From cf075d67e611160dfc835f07afdc66320155f1ab Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Tue, 2 Jun 2026 11:51:48 -0700
Subject: [PATCH 1/6] research-step: markdown plan templates + data-driven
 theory generation

---
 .gitignore                                    |   3 +
 .../skills/research-step/SKILL.md             |  37 +-
 .../research-step/assets/report_example.tex   | 620 ++++++++++++++++++
 .../skills/research-step/assets/schemas.yaml  |  24 +-
 .../assets/theorizer_mission_example.md       |  78 +++
 .../research-step/scripts/validate-output.sh  |  84 ++-
 .../data_driven_theory_generation.md          | 118 ++++
 .../templates/hypothesis_driven_research.md   |  50 ++
 .../skills/research-step/workflows/execute.md |  21 +-
 .../skills/research-step/workflows/init.md    |   2 +-
 .../skills/research-step/workflows/plan.md    | 104 +--
 plugins/asta/skills/research-step/SKILL.md    |  37 +-
 .../research-step/assets/report_example.tex   | 620 ++++++++++++++++++
 .../skills/research-step/assets/schemas.yaml  |  24 +-
 .../assets/theorizer_mission_example.md       |  78 +++
 .../research-step/scripts/validate-output.sh  |  84 ++-
 .../data_driven_theory_generation.md          | 118 ++++
 .../templates/hypothesis_driven_research.md   |  50 ++
 .../skills/research-step/workflows/execute.md |  21 +-
 .../skills/research-step/workflows/init.md    |   2 +-
 .../skills/research-step/workflows/plan.md    | 104 +--
 skills/research-step/SKILL.md                 |  37 +-
 .../research-step/assets/report_example.tex   | 620 ++++++++++++++++++
 skills/research-step/assets/schemas.yaml      |  24 +-
 .../assets/theorizer_mission_example.md       |  78 +++
 .../research-step/scripts/validate-output.sh  |  84 ++-
 .../data_driven_theory_generation.md          | 118 ++++
 .../templates/hypothesis_driven_research.md   |  50 ++
 skills/research-step/workflows/execute.md     |  21 +-
 skills/research-step/workflows/init.md        |   2 +-
 skills/research-step/workflows/plan.md        | 104 +--
 src/asta/analyze_data/poll.py                 |  69 +-
 src/asta/auto_exp_designer.py                 |  20 +
 src/asta/cli.py                               |   6 +
 src/asta/flows/__init__.py                    |   5 +
 src/asta/flows/passthrough.py                 |  15 +
 src/asta/utils/asta.conf                      |  14 +
 37 files changed, 3172 insertions(+), 374 deletions(-)
 create mode 100644 plugins/asta-preview/skills/research-step/assets/report_example.tex
 create mode 100644 plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md
 create mode 100644 plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
 create mode 100644 plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
 create mode 100644 plugins/asta/skills/research-step/assets/report_example.tex
 create mode 100644 plugins/asta/skills/research-step/assets/theorizer_mission_example.md
 create mode 100644 plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
 create mode 100644 plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
 create mode 100644 skills/research-step/assets/report_example.tex
 create mode 100644 skills/research-step/assets/theorizer_mission_example.md
 create mode 100644 skills/research-step/templates/data_driven_theory_generation.md
 create mode 100644 skills/research-step/templates/hypothesis_driven_research.md
 create mode 100644 src/asta/auto_exp_designer.py
 create mode 100644 src/asta/flows/__init__.py
 create mode 100644 src/asta/flows/passthrough.py

diff --git a/.gitignore b/.gitignore
index 2fe355b..787b618 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,7 @@ skills-lock.json
 .idea/
 *.iml
 
+# macOS
+.DS_Store
+
 .asta
diff --git a/plugins/asta-preview/skills/research-step/SKILL.md b/plugins/asta-preview/skills/research-step/SKILL.md
index 0d2fcee..0181287 100644
--- a/plugins/asta-preview/skills/research-step/SKILL.md
+++ b/plugins/asta-preview/skills/research-step/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
@@ -31,12 +31,41 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
+## Plan templates
+
+A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
+
+- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
+- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
+- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
+- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
+- Don't add tasks the template doesn't have.
+
+Available templates:
+
+| Name | Purpose |
+|---|---|
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
+
+### Task outputs
+
+Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
+
+| Path | Role |
+|---|---|
+| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
+| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
+| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
+
+Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
+
 ## Routing
 
 ### 1. Honor explicit requests
@@ -51,7 +80,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → if the closed task type is `literature_review`, `hypothesis`, `analysis`, or `synthesis`, chain to **plan** (which chains to **update-summary**); otherwise chain directly to **update-summary**.
+- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta-preview/skills/research-step/assets/report_example.tex b/plugins/asta-preview/skills/research-step/assets/report_example.tex
new file mode 100644
index 0000000..e87ebf5
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/report_example.tex
@@ -0,0 +1,620 @@
+% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
+%
+% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
+% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
+%
+% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
+% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
+% your run embeds its own figures from `artifacts/`.
+
+\documentclass[11pt]{article}
+\usepackage[margin=1in]{geometry}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{hyperref}
+\usepackage{booktabs}
+\usepackage{longtable}
+\usepackage{array}
+\usepackage{enumitem}
+\usepackage{xcolor}
+\usepackage{microtype}
+\usepackage{graphicx}
+\usepackage{titling}
+\usepackage{fancyhdr}
+\usepackage{titlesec}
+\usepackage{tabularx}
+\usepackage{tikz}
+\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
+\definecolor{paperfinderpurple}{HTML}{6D28D9}
+
+\hypersetup{
+  colorlinks=true,
+  linkcolor=blue!55!black,
+  urlcolor=blue!55!black,
+  citecolor=blue!55!black,
+}
+
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead[L]{Multi-Agent Computational Investigation}
+\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
+\fancyfoot[C]{\thepage}
+\renewcommand{\headrulewidth}{0.4pt}
+
+\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
+\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
+
+\setlength{\parskip}{0.5em}
+
+\begin{document}
+
+\begin{titlepage}
+\thispagestyle{empty}
+\vspace*{0.6in}
+\begin{center}
+{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
+\end{center}
+
+\vspace*{40pt}
+
+\noindent\makebox[\textwidth][c]{%
+\begin{tikzpicture}[
+  font=\footnotesize,
+  procbox/.style={
+    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  agentbox/.style={
+    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  paperbox/.style={
+    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  finalbox/.style={
+    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
+  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
+  node distance=0.45cm and 0.45cm,
+]
+% Band 1: discovery phase, left-to-right
+\node[procbox] (scope) {Scope \&\\Definitions};
+\node[procbox, right=of scope] (prov) {Data\\Provenance};
+\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
+\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
+\node[procbox, right=of laws] (themes) {Cluster\\Themes};
+
+% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
+\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
+\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
+\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
+\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
+\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
+
+\begin{scope}[on background layer]
+\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
+\end{scope}
+\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
+
+% Band 3: synthesis + follow-on, left-to-right
+\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
+\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
+\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
+\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
+\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
+
+\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
+
+% Band 1 arrows
+\draw[arr] (scope) -- (prov);
+\draw[arr] (prov) -- (ad);
+\draw[arr] (ad) -- (laws);
+\draw[arr] (laws) -- (themes);
+
+% Band 1 -> Band 2: straight down themes -> lit
+\draw[arr] (themes) -- (lit);
+
+% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
+\draw[arr] (lit) -- (hyp);
+\draw[arr] (hyp) -- (evid);
+\draw[arr] (evid) -- (exp);
+\draw[arr] (exp) -- (rep);
+
+% Retry self-loop on rep (black so it reads clearly)
+\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
+
+% Band 2 -> Band 3: straight down rep -> across
+\draw[arr] (rep) -- (across);
+
+% Band 3 arrows
+\draw[arr] (across) -- (theo);
+\draw[arr] (theo) -- (nov);
+\draw[arr] (nov) -- (aed);
+\draw[arr] (aed) -- (dv2);
+
+% Band 3 -> final report
+\draw[arr] (dv2) -- (rep_final);
+
+% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
+\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
+\end{tikzpicture}%
+}
+
+\vspace*{\fill}
+\begin{center}
+\footnotesize\itshape\color{gray!50!black}
+Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
+\end{center}
+\end{titlepage}
+
+\tableofcontents
+\newpage
+
+%---------------------------------------------------------------
+\section{Mission}
+
+This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
+
+We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
+
+The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
+
+%---------------------------------------------------------------
+\section{Abstract}
+
+We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
+
+\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
+\begin{itemize}[noitemsep]
+\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
+\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
+\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
+\end{itemize}
+
+\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
+
+\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
+
+%---------------------------------------------------------------
+\section{Background and Motivation}
+
+\subsection{The Pakistan WPV1 resurgence}
+
+Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
+\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
+\label{fig:national}
+\end{figure}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
+\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
+\label{fig:district}
+\end{figure}
+
+\subsection{The older-cohort hypothesis}
+
+Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
+
+\subsection{Prior AutoDiscovery findings}
+
+Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
+\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
+\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
+\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
+\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
+\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
+\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
+\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
+\end{description}
+
+These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
+
+%---------------------------------------------------------------
+\section{Methods}
+\label{sec:methods}
+
+\subsection{Data sources}
+
+The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
+
+\begin{itemize}[noitemsep]
+\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
+\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
+\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
+\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
+\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
+\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
+\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
+\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
+\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
+\end{itemize}
+
+\subsection{Computational agents and their roles}
+
+\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
+
+\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
+
+\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
+
+\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
+
+\subsection{AutoDiscovery curation and replication design}
+\label{sec:methods_ad}
+
+The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
+
+For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
+
+\subsection{Cross-source robustness experiments}
+
+Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
+
+\subsection{Theorizer runs}
+
+Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
+
+\subsection{AutoExperimentDesigner follow-on protocols}
+
+After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
+
+For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
+
+\subsection{Statistical procedures}
+
+\begin{itemize}[noitemsep]
+\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
+\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
+\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
+\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
+\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Results}
+
+The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
+\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
+\label{fig:matrix}
+\end{figure}
+
+\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
+
+The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
+
+The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
+E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
+E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L1.}
+\end{table}
+
+\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
+
+\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
+
+L2 was tested in three independent ways and was refuted in all three.
+
+In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
+
+Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
+\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
+\label{fig:subtype}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
+E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
+E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
+E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L2.}
+\end{table}
+
+\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
+
+\subsection{Law L3 --- Two-regime household contact intensity}
+
+L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
+
+\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
+
+\subsection{Law L4 --- Cross-border mobility mechanism}
+
+L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
+
+\begin{itemize}[noitemsep]
+\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
+\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
+\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
+\end{itemize}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
+\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
+\label{fig:pakafg}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
+E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
+E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
+E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L4.}
+\end{table}
+
+\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
+
+\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
+
+L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
+
+The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
+
+\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
+
+%---------------------------------------------------------------
+\section{Pre-Registered Confirmatory Test of the Combined Theory}
+\label{sec:final}
+
+\subsection{Background and rationale}
+
+The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
+
+This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
+
+\subsection{AutoExperimentDesigner pre-registered protocol}
+
+The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
+
+\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
+
+\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
+
+\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
+
+\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
+
+\subsection{DataVoyager execution}
+
+The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
+
+A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
+
+\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
+
+The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
+
+\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
+
+In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
+
+The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
+
+\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
+
+At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
+
+\subsection{Combined verdict}
+
+All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Component & Key statistic & Status \\
+\midrule
+P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
+P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
+P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
+Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
+\bottomrule
+\end{tabular}
+\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
+\end{table}
+
+%---------------------------------------------------------------
+\section{Trustworthiness Analysis}
+
+\subsection{What we can trust}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
+\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
+\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
+\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
+\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
+\end{itemize}
+
+\subsection{Key limitations}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
+\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
+\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
+\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
+\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
+\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
+\end{itemize}
+
+\subsection{Deviations from protocol}
+
+\begin{itemize}[noitemsep]
+\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
+\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Conclusions}
+
+\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
+
+\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
+
+\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
+
+\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
+
+\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
+
+%---------------------------------------------------------------
+\section{Future Directions}
+
+\begin{enumerate}[noitemsep]
+\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
+\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
+\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
+\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
+\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
+\end{enumerate}
+
+%---------------------------------------------------------------
+\appendix
+
+\section{Computational Experiment Catalogue}
+\label{app:experiments}
+
+This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
+
+\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
+
+\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
+
+\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
+
+\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
+
+\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
+
+\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
+
+\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
+
+\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
+
+\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
+
+\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
+
+\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
+
+\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
+
+\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
+
+\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
+
+\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
+
+\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
+
+\section{Datasets}
+\label{app:datasets}
+
+\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
+\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
+
+\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
+
+\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
+
+\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
+
+\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
+
+\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
+
+\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
+
+\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
+
+\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
+
+\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
+\end{description}
+
+\section{References}
+
+The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
+
+\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
+\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
+
+\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
+
+\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
+
+\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
+
+\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
+
+\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
+
+\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
+
+\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
+
+\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
+
+\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
+
+\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
+
+\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
+
+\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
+
+\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
+
+\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
+
+\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
+
+\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
+\end{description}
+
+\end{document}
diff --git a/plugins/asta-preview/skills/research-step/assets/schemas.yaml b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
index b840628..888db1b 100644
--- a/plugins/asta-preview/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
@@ -1,20 +1,18 @@
-# Output schemas for research-step task types.
-# Each task issue stores its realized output at metadata.research_step.output,
-# matching the shape under `output:` for its task_type.
+# Output shapes for research-step tasks. Each task stores its output at
+# metadata.research_step.output, matching the shape under `output:` for its type.
+# Wiring (which task feeds which) lives in the templates, not here.
 
 schema_version: 1
 
 task_types:
 
   scope:
-    inputs: []
     output:
       question: string                   # the precise research question
       boundaries: [string]               # what is in / out of scope
       success_criteria: [string]         # how we know we have answered it
 
   definitions:
-    inputs: [scope]
     output:
       terms:
         - name: string
@@ -22,7 +20,6 @@ task_types:
           rationale: string
 
   literature_review:
-    inputs: [scope, definitions]
     output:
       summary_path: string               # relative path; long-form context
       key_findings: [string]             # 3-10 bullets readable without opening summary_path
@@ -34,7 +31,6 @@ task_types:
           relevance: string
 
   hypothesis:
-    inputs: [scope, literature_review]
     output:
       statement: string                  # H_n: ...
       rationale: string
@@ -42,7 +38,6 @@ task_types:
       expected_evidence: [string]
 
   experiment_design:
-    inputs: [hypothesis]
     output:
       method: string
       procedure: [string]                # ordered steps
@@ -53,7 +48,6 @@ task_types:
       artifacts_expected: [string]       # paths the gathering step will produce
 
   evidence_gathering:
-    inputs: [experiment_design]
     output:
       artifacts:
         - path: string
@@ -62,8 +56,17 @@ task_types:
       log_path: string                   # what was actually run
       deviations: [string]               # ways execution diverged from design
 
+  auto_discovery:
+    output:
+      runid: string                      # the AutoDS run (created or imported)
+      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
+      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
+      surprising_nodes:
+        - id: string                     # e.g. node_3_0
+          surprise: number
+          finding: string
+
   analysis:
-    inputs: [hypothesis, evidence_gathering]
     output:
       verdict: enum [supported, refuted, inconclusive]
       confidence: number                 # 0.0 - 1.0
@@ -71,7 +74,6 @@ task_types:
       caveats: [string]
 
   synthesis:
-    inputs: [scope, analysis_*]          # all analysis issues in the epic
     output:
       answer: string                     # answer to scope.question
       supporting_hypotheses: [bd_id]
diff --git a/plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md b/plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md
new file mode 100644
index 0000000..acaa800
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md
@@ -0,0 +1,78 @@
+# Example theorizer mission statement
+
+This is a worked example of the **mission statement** passed to the theorizer in the
+`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
+run's `mission.md`; it is the prompt the theorizer receives once the per-theme
+reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
+and the per-theme findings.
+
+A well-formed theorizer mission does five things, and this example shows all five:
+
+1. **States the question** in one sentence, naming the phenomenon and the population of interest.
+2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
+3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
+4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
+5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
+
+Tagging each finding/question/constraint with its supporting experiment is what keeps
+the returned theories anchorable: downstream, `theorizer_theories` drops any theory
+without ≥1 law anchor, and this structure makes the anchor explicit.
+
+---
+
+```
+Mission: Generate theories that explain the role of populations aged 5+ years in
+Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
+findings and the open questions they leave unresolved.
+
+SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
+  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
+      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
+  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
+      strengthening significantly after 2021 (X2).
+  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
+      by under-5 population share, with under-5 share dominating 15-64 working-age
+      share (T2 retry-1).
+  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
+      dominates in adult-heavy districts (X4, p<0.001).
+  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
+      scale (T5 retry-0/1).
+  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
+      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
+      (X7).
+
+OPEN QUESTIONS (theories should address at least one):
+  Q1. What replaced national Pol3 coverage as the dominant transmission lever
+      after 2018-2019?
+  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
+      drives the case coupling intensification?
+  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
+      in young districts) appear?
+  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
+      they are NOT the dominant district-level predictor but ARE plausibly the
+      operative mobility vectors?
+
+CONSTRAINTS (refuted framings to avoid):
+  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
+      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
+  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
+      refuted at district by T2, at province by T2 retry-4, on silent-transmission
+      signature by X3, and on subtype contrast by X4.
+  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
+      by T5 retry-0/1.
+  C4. Theories centered on resident Afghan refugee populations as a static mobility
+      channel — refuted by X7.
+
+REWARDED FRAMINGS:
+  R1. Theories that explain the 2018-2019 break date in terms of immunological,
+      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
+  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
+      deportations, seasonal transit) consistent with the post-2021 intensification.
+  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
+      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
+      while WPV1 retains a pediatric profile.
+  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
+      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
+  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
+      (mobility) — the two laws DV reproduced.
+```
diff --git a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
index 0f5a84e..7523283 100755
--- a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
@@ -1,14 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh — structural validation of a research_step output JSON.
 #
-# Usage: validate-output.sh <task_type> <metadata-json-file>
+# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the canonical metadata envelope
+#   2. carries the metadata wrapper
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
+# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
+# checks on its output.md: present, non-empty, has links, no unlinked entities.
 #
 # Exit codes:
 #   0  — valid
@@ -16,18 +18,27 @@
 #   3  — unknown task_type
 #   4  — missing required field
 #   5  — task_type mismatch with envelope
+#   6  — required output.md missing (only when [task-dir] supplied)
+#   7  — output.md empty or a stub (only when [task-dir] supplied)
+#   8  — output.md has no markdown links (only when [task-dir] supplied)
+#   9  — a named entity is unlinked (only when [task-dir] supplied)
+#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
+#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
+#           <3 embedded figures (14), a required section is missing (15)
 #
-# This is structural validation only. Quality validation (sound prediction,
-# sane confidence, valid citations) is out of scope per execute.md.
+# Structural checks only — required fields, working links, and the report's basic
+# pieces. It can't tell whether the science is sound or the writing is good; that's
+# the agent's job.
 set -euo pipefail
 
-if [[ $# -ne 2 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file>" >&2
+if [[ $# -lt 2 || $# -gt 3 ]]; then
+  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
   exit 1
 fi
 
 task_type="$1"
 file="$2"
+task_dir="${3:-}"
 
 if ! jq -e . "$file" > /dev/null 2>&1; then
   echo "validate-output: $file is not valid JSON" >&2
@@ -42,16 +53,17 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
+  auto_discovery)     required="runid status experiments_path" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
     echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis" >&2
+    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
     exit 3
     ;;
 esac
 
-# Envelope must carry the matching task_type so we don't validate scope JSON
+# The wrapper must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -63,7 +75,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Envelope shape sanity.
+# Wrapper shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -99,4 +111,58 @@ case "$task_type" in
     ;;
 esac
 
+# output.md document-quality gate. Every task must produce a human-readable
+# output.md (skill "Task outputs" table) that links the entities it names.
+if [[ -n "$task_dir" ]]; then
+  md="$task_dir/output.md"
+  if [[ ! -f "$md" ]]; then
+    echo "validate-output: required output.md not found at '$md'" >&2
+    exit 6
+  fi
+  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
+    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
+    exit 7
+  fi
+  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
+    echo "validate-output: output.md has no markdown links" >&2
+    exit 8
+  fi
+  # Strip links, then flag any named entity still bare in output.md / report.tex.
+  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
+    [[ -f "$f" ]] && perl -ne '
+      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
+      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
+      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
+    ' "$f"
+  done) || true
+  if [[ -n "$unlinked" ]]; then
+    echo "$unlinked" >&2
+    echo "validate-output: named entities above are unlinked" >&2
+    exit 9
+  fi
+
+  # The report's basics. Only the report node makes report.tex; when it exists,
+  # check it has what report_example.tex has. Each failure points back to it.
+  rpt="$task_dir/artifacts/report.tex"
+  if [[ -f "$rpt" ]]; then
+    ref="assets/report_example.tex"
+    rfail() {
+      echo "report-gate: $1" >&2
+      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
+      echo "     its depth and citation density before retrying." >&2
+      exit "$2"
+    }
+    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
+    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
+      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
+      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
+    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
+    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
+    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
+    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
+      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
+    done
+  fi
+fi
+
 echo "ok"
diff --git a/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
new file mode 100644
index 0000000..756635c
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
@@ -0,0 +1,118 @@
+---
+name: data_driven_theory_generation
+description: |
+  See which of an AutoDS run's most surprising findings hold up on independent
+  data, then build theories on the ones that do and test the best with a new experiment.
+---
+
+# Data-driven theory generation
+
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  data_provenance["Data provenance"]
+  definitions --> data_provenance
+  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
+  data_provenance --> auto_discovery
+  subgraph sub1["for each of the 10 surprising findings"]
+    direction TB
+    hypothesis["Restate finding"]
+    literature_review["Literature search"]
+    experiment_design["Pre-register test"]
+    evidence_gathering["Find independent data"]
+    analysis["Replicate"]
+    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
+    analysis -- "retry: inconclusive → re-spec" --> experiment_design
+    analysis -- "retry: bad data → re-locate" --> evidence_gathering
+  end
+  auto_discovery --> hypothesis
+  replication_synthesis["Replication summary (k of 10, by mechanism)"]
+  analysis --> replication_synthesis
+  theorizer_theories["Theorizer-grounded theories"]
+  replication_synthesis --> theorizer_theories
+  novelty["Score theories for novelty"]
+  theorizer_theories --> novelty
+  subgraph sub2["for each of the top 3 theories"]
+    direction TB
+    followon_exp_design["Pre-register experiment (AED)"]
+    followon_evidence["Find new data"]
+    followon_analysis["Run, or leave as a proposal"]
+    followon_exp_design --> followon_evidence --> followon_analysis
+  end
+  novelty --> followon_exp_design
+  report["Closing report"]
+  followon_analysis --> report
+  report --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
+| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
+| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
+| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
+| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
+| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
+| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
+| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
+| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
+| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+
+The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
+
+## Running DataVoyager
+
+Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
+
+A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
+
+| What DataVoyager did | Go back to | Fix |
+|---|---|---|
+| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
+| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
+| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
+
+## mission.md
+
+- `run_pointer:` — the AutoDS run to import (omit to create one).
+- `datasets[]` — input dataset URIs for a new run.
+- A focus statement in the body — the question under study.
+
+Unless the user explicitly says to use local inputs only, fetch external public data for replication.
+
+## Writing the report and outputs
+
+These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
+
+- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
+- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
+- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
+
+  | thing | link to |
+  |---|---|
+  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
+  | paper | the asta document, paper URL, or `data_provenance` entry |
+  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
+  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
+  | dataset | the file under `inputs/`, or the Datasets appendix |
+  | experiment E-number | its appendix entry |
+
+- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
+- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
+- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md b/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
new file mode 100644
index 0000000..eb3c847
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
@@ -0,0 +1,50 @@
+---
+name: hypothesis_driven_research
+description: |
+  Literature-grounded hypothesis generation. Survey the literature, raise a
+  hypothesis per gap, test each, and write a closing report.
+---
+
+# Hypothesis-driven research
+
+Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  lit_review["Literature review"]
+  definitions --> lit_review
+  subgraph sub1["for each gap"]
+    direction TB
+    hypothesis["Hypothesis"]
+    experiment_design["Experiment design"]
+    evidence_gathering["Evidence gathering"]
+    analysis["Analysis"]
+    hypothesis --> experiment_design --> evidence_gathering --> analysis
+  end
+  lit_review --> hypothesis
+  closing["Closing synthesis"]
+  analysis --> closing
+  closing --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | One line: the question under study. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
+| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
+| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
+| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
+| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
+
+The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/plugins/asta-preview/skills/research-step/workflows/execute.md b/plugins/asta-preview/skills/research-step/workflows/execute.md
index 5fba9ea..61bebc7 100644
--- a/plugins/asta-preview/skills/research-step/workflows/execute.md
+++ b/plugins/asta-preview/skills/research-step/workflows/execute.md
@@ -1,6 +1,6 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
 
 ## Preconditions
 
@@ -9,22 +9,17 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). Hypothesis tasks are normally auto-resolved at creation by **plan**, so they should not appear here. If one does, it means the gap text was too thin for plan to fill the output without inventing content — flag this to the user and ask whether to refine the source `literature_review` first.
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce a JSON object matching the schema. For schema fields ending in `_path`, write the file to disk first and put the relative path in the JSON.
-6. **Validate structurally.** Run `scripts/validate-output.sh <task_type> <metadata-json-file>`. It checks the envelope (`research_step.task_type`, `inputs`, `output_schema_version`, `output`) and every required `output.<key>` for the task_type, plus type spot-checks for the high-leverage cases (e.g., `analysis.verdict` enum, `analysis.confidence` range). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Materialize the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan or update-summary.** Some closed task types unlock new graph structure; others don't. Decide based on the closed task's `task_type`:
-
-   | Closed task_type | Hand off to |
-   |---|---|
-   | `literature_review`, `hypothesis`, `analysis`, `synthesis` | **plan** (with this issue as the source). `plan` then chains to **update-summary**. Note: `hypothesis` only reaches this branch in the rare case it was left open at creation; the normal path is plan→auto-resolve. |
-   | `scope`, `definitions`, `experiment_design`, `evidence_gathering` | **update-summary** directly. |
+5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
 
-   Either path ends with `summary.md` rebuilt.
+   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
+6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
+7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
+8. **Close.** `bd close <id>`.
+9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
 
 ## Notes on output files
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/init.md b/plugins/asta-preview/skills/research-step/workflows/init.md
index fd11be3..4df19c0 100644
--- a/plugins/asta-preview/skills/research-step/workflows/init.md
+++ b/plugins/asta-preview/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
+After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
 
 ## Preconditions
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/plan.md b/plugins/asta-preview/skills/research-step/workflows/plan.md
index c5ffb2d..e0a158d 100644
--- a/plugins/asta-preview/skills/research-step/workflows/plan.md
+++ b/plugins/asta-preview/skills/research-step/workflows/plan.md
@@ -1,99 +1,61 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier (scope, definitions, literature_review) from `mission.md`.
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
+- **replan** — an epic exists. Add the next tasks after one closes.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized — else run **init**.
+- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
+- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
 
-## Issue metadata convention
-
-Every task issue carries:
-
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
-
-The mission epic additionally carries `epic_root: true`.
+Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
 
 ## Mode selection
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
 
-## Bootstrap mode
+## Bootstrap
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
+1. Read `mission.md` (abort to **brainstorm** if missing).
+2. Create the epic:
    ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. **Create the initial frontier.** Three `task` issues with the metadata convention above:
-   - `scope: <one-line>` — `inputs: []`
-   - `definitions: <one-line>` — `inputs: [<scope-id>]`
-   - `literature_review: <one-line>` — `inputs: [<scope-id>, <definitions-id>]`
-4. **Add edges.**
-   - `parent-child` from each frontier task to the epic
-   - `blocks`: scope → definitions; scope → literature_review; definitions → literature_review
-5. **Report.** Print the epic ID and the three task IDs.
-
-## Replan mode
-
-Read the source task's task_type and output:
-
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
+4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
 
-Apply this table:
+## Replan
 
-| Source task_type | Action |
-|---|---|
-| `literature_review` | For each gap in `output.gaps`, create a `hypothesis` task with `inputs: [<scope-id>, <source-id>]`. Edges: `parent-child` to epic; `blocks` from the source. **Populate `metadata.research_step.output` at creation time** (see below) and close the issue immediately — the gap text already contains the statement, rationale, and prediction in prose, so there is no separate `execute` pass for hypotheses. |
-| `hypothesis` | Create the chain `experiment_design` → `evidence_gathering` → `analysis`, each `blocks` the next. `experiment_design` depends on the hypothesis (via `inputs`); `analysis` depends on both the hypothesis and the new `evidence_gathering`. All three get `parent-child` to the epic. |
-| `analysis` | If every `hypothesis` in the epic now has a closed `analysis`, create one `synthesis` task with `inputs` listing all analysis IDs and the scope ID. `parent-child` to epic; `blocks` from each analysis. Otherwise no-op. |
-| `synthesis` | If `output.open_questions` is non-empty, **stop and ask the user** before creating new `hypothesis` tasks. If approved, create them with a `discovered-from` edge back to the synthesis (in addition to the usual edges). |
-| `scope`, `definitions`, `experiment_design`, `evidence_gathering` | No replan. Report no-op and stop. |
+The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
+- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
+- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
 
-### Auto-resolving hypothesis tasks
+### Filling in hypotheses
 
-When creating a `hypothesis` from a literature_review gap:
+A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
 
-1. Derive the four output fields directly from the gap text and surrounding `literature_review` output (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why this gap implies the claim
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Validate with `scripts/validate-output.sh hypothesis <metadata-json-file>` before persisting.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
+2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
+3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
+4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
+5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**.
 
-## Out of scope
+## Not here
 
-- Running tasks or producing outputs. That belongs to **execute**.
-- Environment setup (installing `bd`/`jq`, `bd init`). That belongs to **init**.
-- Editing `mission.md`. That belongs to **brainstorm**.
-- Validating output quality.
+Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
diff --git a/plugins/asta/skills/research-step/SKILL.md b/plugins/asta/skills/research-step/SKILL.md
index 0d2fcee..0181287 100644
--- a/plugins/asta/skills/research-step/SKILL.md
+++ b/plugins/asta/skills/research-step/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
@@ -31,12 +31,41 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
+## Plan templates
+
+A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
+
+- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
+- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
+- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
+- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
+- Don't add tasks the template doesn't have.
+
+Available templates:
+
+| Name | Purpose |
+|---|---|
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
+
+### Task outputs
+
+Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
+
+| Path | Role |
+|---|---|
+| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
+| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
+| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
+
+Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
+
 ## Routing
 
 ### 1. Honor explicit requests
@@ -51,7 +80,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → if the closed task type is `literature_review`, `hypothesis`, `analysis`, or `synthesis`, chain to **plan** (which chains to **update-summary**); otherwise chain directly to **update-summary**.
+- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta/skills/research-step/assets/report_example.tex b/plugins/asta/skills/research-step/assets/report_example.tex
new file mode 100644
index 0000000..e87ebf5
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/report_example.tex
@@ -0,0 +1,620 @@
+% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
+%
+% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
+% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
+%
+% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
+% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
+% your run embeds its own figures from `artifacts/`.
+
+\documentclass[11pt]{article}
+\usepackage[margin=1in]{geometry}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{hyperref}
+\usepackage{booktabs}
+\usepackage{longtable}
+\usepackage{array}
+\usepackage{enumitem}
+\usepackage{xcolor}
+\usepackage{microtype}
+\usepackage{graphicx}
+\usepackage{titling}
+\usepackage{fancyhdr}
+\usepackage{titlesec}
+\usepackage{tabularx}
+\usepackage{tikz}
+\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
+\definecolor{paperfinderpurple}{HTML}{6D28D9}
+
+\hypersetup{
+  colorlinks=true,
+  linkcolor=blue!55!black,
+  urlcolor=blue!55!black,
+  citecolor=blue!55!black,
+}
+
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead[L]{Multi-Agent Computational Investigation}
+\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
+\fancyfoot[C]{\thepage}
+\renewcommand{\headrulewidth}{0.4pt}
+
+\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
+\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
+
+\setlength{\parskip}{0.5em}
+
+\begin{document}
+
+\begin{titlepage}
+\thispagestyle{empty}
+\vspace*{0.6in}
+\begin{center}
+{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
+\end{center}
+
+\vspace*{40pt}
+
+\noindent\makebox[\textwidth][c]{%
+\begin{tikzpicture}[
+  font=\footnotesize,
+  procbox/.style={
+    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  agentbox/.style={
+    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  paperbox/.style={
+    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  finalbox/.style={
+    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
+  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
+  node distance=0.45cm and 0.45cm,
+]
+% Band 1: discovery phase, left-to-right
+\node[procbox] (scope) {Scope \&\\Definitions};
+\node[procbox, right=of scope] (prov) {Data\\Provenance};
+\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
+\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
+\node[procbox, right=of laws] (themes) {Cluster\\Themes};
+
+% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
+\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
+\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
+\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
+\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
+\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
+
+\begin{scope}[on background layer]
+\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
+\end{scope}
+\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
+
+% Band 3: synthesis + follow-on, left-to-right
+\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
+\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
+\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
+\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
+\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
+
+\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
+
+% Band 1 arrows
+\draw[arr] (scope) -- (prov);
+\draw[arr] (prov) -- (ad);
+\draw[arr] (ad) -- (laws);
+\draw[arr] (laws) -- (themes);
+
+% Band 1 -> Band 2: straight down themes -> lit
+\draw[arr] (themes) -- (lit);
+
+% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
+\draw[arr] (lit) -- (hyp);
+\draw[arr] (hyp) -- (evid);
+\draw[arr] (evid) -- (exp);
+\draw[arr] (exp) -- (rep);
+
+% Retry self-loop on rep (black so it reads clearly)
+\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
+
+% Band 2 -> Band 3: straight down rep -> across
+\draw[arr] (rep) -- (across);
+
+% Band 3 arrows
+\draw[arr] (across) -- (theo);
+\draw[arr] (theo) -- (nov);
+\draw[arr] (nov) -- (aed);
+\draw[arr] (aed) -- (dv2);
+
+% Band 3 -> final report
+\draw[arr] (dv2) -- (rep_final);
+
+% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
+\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
+\end{tikzpicture}%
+}
+
+\vspace*{\fill}
+\begin{center}
+\footnotesize\itshape\color{gray!50!black}
+Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
+\end{center}
+\end{titlepage}
+
+\tableofcontents
+\newpage
+
+%---------------------------------------------------------------
+\section{Mission}
+
+This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
+
+We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
+
+The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
+
+%---------------------------------------------------------------
+\section{Abstract}
+
+We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
+
+\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
+\begin{itemize}[noitemsep]
+\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
+\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
+\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
+\end{itemize}
+
+\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
+
+\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
+
+%---------------------------------------------------------------
+\section{Background and Motivation}
+
+\subsection{The Pakistan WPV1 resurgence}
+
+Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
+\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
+\label{fig:national}
+\end{figure}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
+\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
+\label{fig:district}
+\end{figure}
+
+\subsection{The older-cohort hypothesis}
+
+Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
+
+\subsection{Prior AutoDiscovery findings}
+
+Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
+\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
+\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
+\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
+\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
+\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
+\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
+\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
+\end{description}
+
+These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
+
+%---------------------------------------------------------------
+\section{Methods}
+\label{sec:methods}
+
+\subsection{Data sources}
+
+The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
+
+\begin{itemize}[noitemsep]
+\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
+\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
+\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
+\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
+\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
+\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
+\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
+\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
+\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
+\end{itemize}
+
+\subsection{Computational agents and their roles}
+
+\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
+
+\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
+
+\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
+
+\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
+
+\subsection{AutoDiscovery curation and replication design}
+\label{sec:methods_ad}
+
+The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
+
+For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
+
+\subsection{Cross-source robustness experiments}
+
+Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
+
+\subsection{Theorizer runs}
+
+Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
+
+\subsection{AutoExperimentDesigner follow-on protocols}
+
+After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
+
+For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
+
+\subsection{Statistical procedures}
+
+\begin{itemize}[noitemsep]
+\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
+\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
+\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
+\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
+\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Results}
+
+The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
+\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
+\label{fig:matrix}
+\end{figure}
+
+\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
+
+The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
+
+The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
+E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
+E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L1.}
+\end{table}
+
+\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
+
+\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
+
+L2 was tested in three independent ways and was refuted in all three.
+
+In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
+
+Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
+\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
+\label{fig:subtype}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
+E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
+E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
+E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L2.}
+\end{table}
+
+\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
+
+\subsection{Law L3 --- Two-regime household contact intensity}
+
+L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
+
+\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
+
+\subsection{Law L4 --- Cross-border mobility mechanism}
+
+L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
+
+\begin{itemize}[noitemsep]
+\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
+\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
+\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
+\end{itemize}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
+\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
+\label{fig:pakafg}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
+E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
+E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
+E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L4.}
+\end{table}
+
+\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
+
+\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
+
+L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
+
+The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
+
+\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
+
+%---------------------------------------------------------------
+\section{Pre-Registered Confirmatory Test of the Combined Theory}
+\label{sec:final}
+
+\subsection{Background and rationale}
+
+The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
+
+This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
+
+\subsection{AutoExperimentDesigner pre-registered protocol}
+
+The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
+
+\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
+
+\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
+
+\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
+
+\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
+
+\subsection{DataVoyager execution}
+
+The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
+
+A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
+
+\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
+
+The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
+
+\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
+
+In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
+
+The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
+
+\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
+
+At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
+
+\subsection{Combined verdict}
+
+All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Component & Key statistic & Status \\
+\midrule
+P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
+P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
+P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
+Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
+\bottomrule
+\end{tabular}
+\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
+\end{table}
+
+%---------------------------------------------------------------
+\section{Trustworthiness Analysis}
+
+\subsection{What we can trust}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
+\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
+\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
+\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
+\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
+\end{itemize}
+
+\subsection{Key limitations}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
+\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
+\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
+\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
+\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
+\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
+\end{itemize}
+
+\subsection{Deviations from protocol}
+
+\begin{itemize}[noitemsep]
+\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
+\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Conclusions}
+
+\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
+
+\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
+
+\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
+
+\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
+
+\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
+
+%---------------------------------------------------------------
+\section{Future Directions}
+
+\begin{enumerate}[noitemsep]
+\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
+\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
+\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
+\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
+\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
+\end{enumerate}
+
+%---------------------------------------------------------------
+\appendix
+
+\section{Computational Experiment Catalogue}
+\label{app:experiments}
+
+This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
+
+\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
+
+\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
+
+\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
+
+\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
+
+\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
+
+\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
+
+\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
+
+\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
+
+\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
+
+\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
+
+\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
+
+\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
+
+\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
+
+\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
+
+\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
+
+\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
+
+\section{Datasets}
+\label{app:datasets}
+
+\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
+\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
+
+\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
+
+\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
+
+\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
+
+\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
+
+\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
+
+\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
+
+\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
+
+\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
+
+\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
+\end{description}
+
+\section{References}
+
+The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
+
+\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
+\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
+
+\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
+
+\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
+
+\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
+
+\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
+
+\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
+
+\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
+
+\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
+
+\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
+
+\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
+
+\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
+
+\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
+
+\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
+
+\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
+
+\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
+
+\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
+
+\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
+\end{description}
+
+\end{document}
diff --git a/plugins/asta/skills/research-step/assets/schemas.yaml b/plugins/asta/skills/research-step/assets/schemas.yaml
index b840628..888db1b 100644
--- a/plugins/asta/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta/skills/research-step/assets/schemas.yaml
@@ -1,20 +1,18 @@
-# Output schemas for research-step task types.
-# Each task issue stores its realized output at metadata.research_step.output,
-# matching the shape under `output:` for its task_type.
+# Output shapes for research-step tasks. Each task stores its output at
+# metadata.research_step.output, matching the shape under `output:` for its type.
+# Wiring (which task feeds which) lives in the templates, not here.
 
 schema_version: 1
 
 task_types:
 
   scope:
-    inputs: []
     output:
       question: string                   # the precise research question
       boundaries: [string]               # what is in / out of scope
       success_criteria: [string]         # how we know we have answered it
 
   definitions:
-    inputs: [scope]
     output:
       terms:
         - name: string
@@ -22,7 +20,6 @@ task_types:
           rationale: string
 
   literature_review:
-    inputs: [scope, definitions]
     output:
       summary_path: string               # relative path; long-form context
       key_findings: [string]             # 3-10 bullets readable without opening summary_path
@@ -34,7 +31,6 @@ task_types:
           relevance: string
 
   hypothesis:
-    inputs: [scope, literature_review]
     output:
       statement: string                  # H_n: ...
       rationale: string
@@ -42,7 +38,6 @@ task_types:
       expected_evidence: [string]
 
   experiment_design:
-    inputs: [hypothesis]
     output:
       method: string
       procedure: [string]                # ordered steps
@@ -53,7 +48,6 @@ task_types:
       artifacts_expected: [string]       # paths the gathering step will produce
 
   evidence_gathering:
-    inputs: [experiment_design]
     output:
       artifacts:
         - path: string
@@ -62,8 +56,17 @@ task_types:
       log_path: string                   # what was actually run
       deviations: [string]               # ways execution diverged from design
 
+  auto_discovery:
+    output:
+      runid: string                      # the AutoDS run (created or imported)
+      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
+      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
+      surprising_nodes:
+        - id: string                     # e.g. node_3_0
+          surprise: number
+          finding: string
+
   analysis:
-    inputs: [hypothesis, evidence_gathering]
     output:
       verdict: enum [supported, refuted, inconclusive]
       confidence: number                 # 0.0 - 1.0
@@ -71,7 +74,6 @@ task_types:
       caveats: [string]
 
   synthesis:
-    inputs: [scope, analysis_*]          # all analysis issues in the epic
     output:
       answer: string                     # answer to scope.question
       supporting_hypotheses: [bd_id]
diff --git a/plugins/asta/skills/research-step/assets/theorizer_mission_example.md b/plugins/asta/skills/research-step/assets/theorizer_mission_example.md
new file mode 100644
index 0000000..acaa800
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/theorizer_mission_example.md
@@ -0,0 +1,78 @@
+# Example theorizer mission statement
+
+This is a worked example of the **mission statement** passed to the theorizer in the
+`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
+run's `mission.md`; it is the prompt the theorizer receives once the per-theme
+reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
+and the per-theme findings.
+
+A well-formed theorizer mission does five things, and this example shows all five:
+
+1. **States the question** in one sentence, naming the phenomenon and the population of interest.
+2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
+3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
+4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
+5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
+
+Tagging each finding/question/constraint with its supporting experiment is what keeps
+the returned theories anchorable: downstream, `theorizer_theories` drops any theory
+without ≥1 law anchor, and this structure makes the anchor explicit.
+
+---
+
+```
+Mission: Generate theories that explain the role of populations aged 5+ years in
+Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
+findings and the open questions they leave unresolved.
+
+SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
+  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
+      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
+  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
+      strengthening significantly after 2021 (X2).
+  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
+      by under-5 population share, with under-5 share dominating 15-64 working-age
+      share (T2 retry-1).
+  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
+      dominates in adult-heavy districts (X4, p<0.001).
+  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
+      scale (T5 retry-0/1).
+  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
+      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
+      (X7).
+
+OPEN QUESTIONS (theories should address at least one):
+  Q1. What replaced national Pol3 coverage as the dominant transmission lever
+      after 2018-2019?
+  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
+      drives the case coupling intensification?
+  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
+      in young districts) appear?
+  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
+      they are NOT the dominant district-level predictor but ARE plausibly the
+      operative mobility vectors?
+
+CONSTRAINTS (refuted framings to avoid):
+  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
+      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
+  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
+      refuted at district by T2, at province by T2 retry-4, on silent-transmission
+      signature by X3, and on subtype contrast by X4.
+  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
+      by T5 retry-0/1.
+  C4. Theories centered on resident Afghan refugee populations as a static mobility
+      channel — refuted by X7.
+
+REWARDED FRAMINGS:
+  R1. Theories that explain the 2018-2019 break date in terms of immunological,
+      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
+  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
+      deportations, seasonal transit) consistent with the post-2021 intensification.
+  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
+      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
+      while WPV1 retains a pediatric profile.
+  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
+      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
+  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
+      (mobility) — the two laws DV reproduced.
+```
diff --git a/plugins/asta/skills/research-step/scripts/validate-output.sh b/plugins/asta/skills/research-step/scripts/validate-output.sh
index 0f5a84e..7523283 100755
--- a/plugins/asta/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta/skills/research-step/scripts/validate-output.sh
@@ -1,14 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh — structural validation of a research_step output JSON.
 #
-# Usage: validate-output.sh <task_type> <metadata-json-file>
+# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the canonical metadata envelope
+#   2. carries the metadata wrapper
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
+# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
+# checks on its output.md: present, non-empty, has links, no unlinked entities.
 #
 # Exit codes:
 #   0  — valid
@@ -16,18 +18,27 @@
 #   3  — unknown task_type
 #   4  — missing required field
 #   5  — task_type mismatch with envelope
+#   6  — required output.md missing (only when [task-dir] supplied)
+#   7  — output.md empty or a stub (only when [task-dir] supplied)
+#   8  — output.md has no markdown links (only when [task-dir] supplied)
+#   9  — a named entity is unlinked (only when [task-dir] supplied)
+#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
+#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
+#           <3 embedded figures (14), a required section is missing (15)
 #
-# This is structural validation only. Quality validation (sound prediction,
-# sane confidence, valid citations) is out of scope per execute.md.
+# Structural checks only — required fields, working links, and the report's basic
+# pieces. It can't tell whether the science is sound or the writing is good; that's
+# the agent's job.
 set -euo pipefail
 
-if [[ $# -ne 2 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file>" >&2
+if [[ $# -lt 2 || $# -gt 3 ]]; then
+  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
   exit 1
 fi
 
 task_type="$1"
 file="$2"
+task_dir="${3:-}"
 
 if ! jq -e . "$file" > /dev/null 2>&1; then
   echo "validate-output: $file is not valid JSON" >&2
@@ -42,16 +53,17 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
+  auto_discovery)     required="runid status experiments_path" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
     echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis" >&2
+    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
     exit 3
     ;;
 esac
 
-# Envelope must carry the matching task_type so we don't validate scope JSON
+# The wrapper must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -63,7 +75,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Envelope shape sanity.
+# Wrapper shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -99,4 +111,58 @@ case "$task_type" in
     ;;
 esac
 
+# output.md document-quality gate. Every task must produce a human-readable
+# output.md (skill "Task outputs" table) that links the entities it names.
+if [[ -n "$task_dir" ]]; then
+  md="$task_dir/output.md"
+  if [[ ! -f "$md" ]]; then
+    echo "validate-output: required output.md not found at '$md'" >&2
+    exit 6
+  fi
+  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
+    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
+    exit 7
+  fi
+  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
+    echo "validate-output: output.md has no markdown links" >&2
+    exit 8
+  fi
+  # Strip links, then flag any named entity still bare in output.md / report.tex.
+  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
+    [[ -f "$f" ]] && perl -ne '
+      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
+      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
+      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
+    ' "$f"
+  done) || true
+  if [[ -n "$unlinked" ]]; then
+    echo "$unlinked" >&2
+    echo "validate-output: named entities above are unlinked" >&2
+    exit 9
+  fi
+
+  # The report's basics. Only the report node makes report.tex; when it exists,
+  # check it has what report_example.tex has. Each failure points back to it.
+  rpt="$task_dir/artifacts/report.tex"
+  if [[ -f "$rpt" ]]; then
+    ref="assets/report_example.tex"
+    rfail() {
+      echo "report-gate: $1" >&2
+      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
+      echo "     its depth and citation density before retrying." >&2
+      exit "$2"
+    }
+    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
+    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
+      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
+      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
+    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
+    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
+    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
+    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
+      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
+    done
+  fi
+fi
+
 echo "ok"
diff --git a/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
new file mode 100644
index 0000000..756635c
--- /dev/null
+++ b/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
@@ -0,0 +1,118 @@
+---
+name: data_driven_theory_generation
+description: |
+  See which of an AutoDS run's most surprising findings hold up on independent
+  data, then build theories on the ones that do and test the best with a new experiment.
+---
+
+# Data-driven theory generation
+
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  data_provenance["Data provenance"]
+  definitions --> data_provenance
+  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
+  data_provenance --> auto_discovery
+  subgraph sub1["for each of the 10 surprising findings"]
+    direction TB
+    hypothesis["Restate finding"]
+    literature_review["Literature search"]
+    experiment_design["Pre-register test"]
+    evidence_gathering["Find independent data"]
+    analysis["Replicate"]
+    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
+    analysis -- "retry: inconclusive → re-spec" --> experiment_design
+    analysis -- "retry: bad data → re-locate" --> evidence_gathering
+  end
+  auto_discovery --> hypothesis
+  replication_synthesis["Replication summary (k of 10, by mechanism)"]
+  analysis --> replication_synthesis
+  theorizer_theories["Theorizer-grounded theories"]
+  replication_synthesis --> theorizer_theories
+  novelty["Score theories for novelty"]
+  theorizer_theories --> novelty
+  subgraph sub2["for each of the top 3 theories"]
+    direction TB
+    followon_exp_design["Pre-register experiment (AED)"]
+    followon_evidence["Find new data"]
+    followon_analysis["Run, or leave as a proposal"]
+    followon_exp_design --> followon_evidence --> followon_analysis
+  end
+  novelty --> followon_exp_design
+  report["Closing report"]
+  followon_analysis --> report
+  report --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
+| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
+| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
+| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
+| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
+| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
+| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
+| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
+| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
+| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+
+The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
+
+## Running DataVoyager
+
+Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
+
+A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
+
+| What DataVoyager did | Go back to | Fix |
+|---|---|---|
+| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
+| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
+| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
+
+## mission.md
+
+- `run_pointer:` — the AutoDS run to import (omit to create one).
+- `datasets[]` — input dataset URIs for a new run.
+- A focus statement in the body — the question under study.
+
+Unless the user explicitly says to use local inputs only, fetch external public data for replication.
+
+## Writing the report and outputs
+
+These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
+
+- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
+- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
+- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
+
+  | thing | link to |
+  |---|---|
+  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
+  | paper | the asta document, paper URL, or `data_provenance` entry |
+  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
+  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
+  | dataset | the file under `inputs/`, or the Datasets appendix |
+  | experiment E-number | its appendix entry |
+
+- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
+- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
+- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md b/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
new file mode 100644
index 0000000..eb3c847
--- /dev/null
+++ b/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
@@ -0,0 +1,50 @@
+---
+name: hypothesis_driven_research
+description: |
+  Literature-grounded hypothesis generation. Survey the literature, raise a
+  hypothesis per gap, test each, and write a closing report.
+---
+
+# Hypothesis-driven research
+
+Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  lit_review["Literature review"]
+  definitions --> lit_review
+  subgraph sub1["for each gap"]
+    direction TB
+    hypothesis["Hypothesis"]
+    experiment_design["Experiment design"]
+    evidence_gathering["Evidence gathering"]
+    analysis["Analysis"]
+    hypothesis --> experiment_design --> evidence_gathering --> analysis
+  end
+  lit_review --> hypothesis
+  closing["Closing synthesis"]
+  analysis --> closing
+  closing --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | One line: the question under study. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
+| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
+| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
+| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
+| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
+
+The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/plugins/asta/skills/research-step/workflows/execute.md b/plugins/asta/skills/research-step/workflows/execute.md
index 5fba9ea..61bebc7 100644
--- a/plugins/asta/skills/research-step/workflows/execute.md
+++ b/plugins/asta/skills/research-step/workflows/execute.md
@@ -1,6 +1,6 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
 
 ## Preconditions
 
@@ -9,22 +9,17 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). Hypothesis tasks are normally auto-resolved at creation by **plan**, so they should not appear here. If one does, it means the gap text was too thin for plan to fill the output without inventing content — flag this to the user and ask whether to refine the source `literature_review` first.
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce a JSON object matching the schema. For schema fields ending in `_path`, write the file to disk first and put the relative path in the JSON.
-6. **Validate structurally.** Run `scripts/validate-output.sh <task_type> <metadata-json-file>`. It checks the envelope (`research_step.task_type`, `inputs`, `output_schema_version`, `output`) and every required `output.<key>` for the task_type, plus type spot-checks for the high-leverage cases (e.g., `analysis.verdict` enum, `analysis.confidence` range). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Materialize the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan or update-summary.** Some closed task types unlock new graph structure; others don't. Decide based on the closed task's `task_type`:
-
-   | Closed task_type | Hand off to |
-   |---|---|
-   | `literature_review`, `hypothesis`, `analysis`, `synthesis` | **plan** (with this issue as the source). `plan` then chains to **update-summary**. Note: `hypothesis` only reaches this branch in the rare case it was left open at creation; the normal path is plan→auto-resolve. |
-   | `scope`, `definitions`, `experiment_design`, `evidence_gathering` | **update-summary** directly. |
+5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
 
-   Either path ends with `summary.md` rebuilt.
+   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
+6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
+7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
+8. **Close.** `bd close <id>`.
+9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
 
 ## Notes on output files
 
diff --git a/plugins/asta/skills/research-step/workflows/init.md b/plugins/asta/skills/research-step/workflows/init.md
index fd11be3..4df19c0 100644
--- a/plugins/asta/skills/research-step/workflows/init.md
+++ b/plugins/asta/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
+After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
 
 ## Preconditions
 
diff --git a/plugins/asta/skills/research-step/workflows/plan.md b/plugins/asta/skills/research-step/workflows/plan.md
index c5ffb2d..e0a158d 100644
--- a/plugins/asta/skills/research-step/workflows/plan.md
+++ b/plugins/asta/skills/research-step/workflows/plan.md
@@ -1,99 +1,61 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier (scope, definitions, literature_review) from `mission.md`.
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
+- **replan** — an epic exists. Add the next tasks after one closes.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized — else run **init**.
+- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
+- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
 
-## Issue metadata convention
-
-Every task issue carries:
-
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
-
-The mission epic additionally carries `epic_root: true`.
+Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
 
 ## Mode selection
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
 
-## Bootstrap mode
+## Bootstrap
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
+1. Read `mission.md` (abort to **brainstorm** if missing).
+2. Create the epic:
    ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. **Create the initial frontier.** Three `task` issues with the metadata convention above:
-   - `scope: <one-line>` — `inputs: []`
-   - `definitions: <one-line>` — `inputs: [<scope-id>]`
-   - `literature_review: <one-line>` — `inputs: [<scope-id>, <definitions-id>]`
-4. **Add edges.**
-   - `parent-child` from each frontier task to the epic
-   - `blocks`: scope → definitions; scope → literature_review; definitions → literature_review
-5. **Report.** Print the epic ID and the three task IDs.
-
-## Replan mode
-
-Read the source task's task_type and output:
-
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
+4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
 
-Apply this table:
+## Replan
 
-| Source task_type | Action |
-|---|---|
-| `literature_review` | For each gap in `output.gaps`, create a `hypothesis` task with `inputs: [<scope-id>, <source-id>]`. Edges: `parent-child` to epic; `blocks` from the source. **Populate `metadata.research_step.output` at creation time** (see below) and close the issue immediately — the gap text already contains the statement, rationale, and prediction in prose, so there is no separate `execute` pass for hypotheses. |
-| `hypothesis` | Create the chain `experiment_design` → `evidence_gathering` → `analysis`, each `blocks` the next. `experiment_design` depends on the hypothesis (via `inputs`); `analysis` depends on both the hypothesis and the new `evidence_gathering`. All three get `parent-child` to the epic. |
-| `analysis` | If every `hypothesis` in the epic now has a closed `analysis`, create one `synthesis` task with `inputs` listing all analysis IDs and the scope ID. `parent-child` to epic; `blocks` from each analysis. Otherwise no-op. |
-| `synthesis` | If `output.open_questions` is non-empty, **stop and ask the user** before creating new `hypothesis` tasks. If approved, create them with a `discovered-from` edge back to the synthesis (in addition to the usual edges). |
-| `scope`, `definitions`, `experiment_design`, `evidence_gathering` | No replan. Report no-op and stop. |
+The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
+- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
+- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
 
-### Auto-resolving hypothesis tasks
+### Filling in hypotheses
 
-When creating a `hypothesis` from a literature_review gap:
+A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
 
-1. Derive the four output fields directly from the gap text and surrounding `literature_review` output (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why this gap implies the claim
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Validate with `scripts/validate-output.sh hypothesis <metadata-json-file>` before persisting.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
+2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
+3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
+4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
+5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**.
 
-## Out of scope
+## Not here
 
-- Running tasks or producing outputs. That belongs to **execute**.
-- Environment setup (installing `bd`/`jq`, `bd init`). That belongs to **init**.
-- Editing `mission.md`. That belongs to **brainstorm**.
-- Validating output quality.
+Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
diff --git a/skills/research-step/SKILL.md b/skills/research-step/SKILL.md
index 0d2fcee..0181287 100644
--- a/skills/research-step/SKILL.md
+++ b/skills/research-step/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
@@ -31,12 +31,41 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
+## Plan templates
+
+A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
+
+- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
+- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
+- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
+- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
+- Don't add tasks the template doesn't have.
+
+Available templates:
+
+| Name | Purpose |
+|---|---|
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
+
+### Task outputs
+
+Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
+
+| Path | Role |
+|---|---|
+| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
+| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
+| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
+
+Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
+
 ## Routing
 
 ### 1. Honor explicit requests
@@ -51,7 +80,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → if the closed task type is `literature_review`, `hypothesis`, `analysis`, or `synthesis`, chain to **plan** (which chains to **update-summary**); otherwise chain directly to **update-summary**.
+- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/skills/research-step/assets/report_example.tex b/skills/research-step/assets/report_example.tex
new file mode 100644
index 0000000..e87ebf5
--- /dev/null
+++ b/skills/research-step/assets/report_example.tex
@@ -0,0 +1,620 @@
+% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
+%
+% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
+% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
+%
+% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
+% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
+% your run embeds its own figures from `artifacts/`.
+
+\documentclass[11pt]{article}
+\usepackage[margin=1in]{geometry}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{hyperref}
+\usepackage{booktabs}
+\usepackage{longtable}
+\usepackage{array}
+\usepackage{enumitem}
+\usepackage{xcolor}
+\usepackage{microtype}
+\usepackage{graphicx}
+\usepackage{titling}
+\usepackage{fancyhdr}
+\usepackage{titlesec}
+\usepackage{tabularx}
+\usepackage{tikz}
+\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
+\definecolor{paperfinderpurple}{HTML}{6D28D9}
+
+\hypersetup{
+  colorlinks=true,
+  linkcolor=blue!55!black,
+  urlcolor=blue!55!black,
+  citecolor=blue!55!black,
+}
+
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead[L]{Multi-Agent Computational Investigation}
+\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
+\fancyfoot[C]{\thepage}
+\renewcommand{\headrulewidth}{0.4pt}
+
+\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
+\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
+
+\setlength{\parskip}{0.5em}
+
+\begin{document}
+
+\begin{titlepage}
+\thispagestyle{empty}
+\vspace*{0.6in}
+\begin{center}
+{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
+\end{center}
+
+\vspace*{40pt}
+
+\noindent\makebox[\textwidth][c]{%
+\begin{tikzpicture}[
+  font=\footnotesize,
+  procbox/.style={
+    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  agentbox/.style={
+    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  paperbox/.style={
+    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  finalbox/.style={
+    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
+    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
+  },
+  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
+  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
+  node distance=0.45cm and 0.45cm,
+]
+% Band 1: discovery phase, left-to-right
+\node[procbox] (scope) {Scope \&\\Definitions};
+\node[procbox, right=of scope] (prov) {Data\\Provenance};
+\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
+\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
+\node[procbox, right=of laws] (themes) {Cluster\\Themes};
+
+% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
+\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
+\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
+\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
+\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
+\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
+
+\begin{scope}[on background layer]
+\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
+\end{scope}
+\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
+
+% Band 3: synthesis + follow-on, left-to-right
+\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
+\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
+\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
+\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
+\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
+
+\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
+
+% Band 1 arrows
+\draw[arr] (scope) -- (prov);
+\draw[arr] (prov) -- (ad);
+\draw[arr] (ad) -- (laws);
+\draw[arr] (laws) -- (themes);
+
+% Band 1 -> Band 2: straight down themes -> lit
+\draw[arr] (themes) -- (lit);
+
+% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
+\draw[arr] (lit) -- (hyp);
+\draw[arr] (hyp) -- (evid);
+\draw[arr] (evid) -- (exp);
+\draw[arr] (exp) -- (rep);
+
+% Retry self-loop on rep (black so it reads clearly)
+\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
+
+% Band 2 -> Band 3: straight down rep -> across
+\draw[arr] (rep) -- (across);
+
+% Band 3 arrows
+\draw[arr] (across) -- (theo);
+\draw[arr] (theo) -- (nov);
+\draw[arr] (nov) -- (aed);
+\draw[arr] (aed) -- (dv2);
+
+% Band 3 -> final report
+\draw[arr] (dv2) -- (rep_final);
+
+% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
+\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
+\end{tikzpicture}%
+}
+
+\vspace*{\fill}
+\begin{center}
+\footnotesize\itshape\color{gray!50!black}
+Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
+\end{center}
+\end{titlepage}
+
+\tableofcontents
+\newpage
+
+%---------------------------------------------------------------
+\section{Mission}
+
+This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
+
+We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
+
+The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
+
+%---------------------------------------------------------------
+\section{Abstract}
+
+We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
+
+\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
+\begin{itemize}[noitemsep]
+\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
+\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
+\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
+\end{itemize}
+
+\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
+
+\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
+
+%---------------------------------------------------------------
+\section{Background and Motivation}
+
+\subsection{The Pakistan WPV1 resurgence}
+
+Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
+\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
+\label{fig:national}
+\end{figure}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
+\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
+\label{fig:district}
+\end{figure}
+
+\subsection{The older-cohort hypothesis}
+
+Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
+
+\subsection{Prior AutoDiscovery findings}
+
+Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
+\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
+\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
+\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
+\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
+\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
+\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
+\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
+\end{description}
+
+These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
+
+%---------------------------------------------------------------
+\section{Methods}
+\label{sec:methods}
+
+\subsection{Data sources}
+
+The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
+
+\begin{itemize}[noitemsep]
+\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
+\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
+\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
+\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
+\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
+\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
+\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
+\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
+\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
+\end{itemize}
+
+\subsection{Computational agents and their roles}
+
+\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
+
+\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
+
+\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
+
+\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
+
+\subsection{AutoDiscovery curation and replication design}
+\label{sec:methods_ad}
+
+The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
+
+For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
+
+\subsection{Cross-source robustness experiments}
+
+Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
+
+\subsection{Theorizer runs}
+
+Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
+
+\subsection{AutoExperimentDesigner follow-on protocols}
+
+After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
+
+For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
+
+\subsection{Statistical procedures}
+
+\begin{itemize}[noitemsep]
+\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
+\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
+\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
+\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
+\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Results}
+
+The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
+\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
+\label{fig:matrix}
+\end{figure}
+
+\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
+
+The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
+
+The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
+E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
+E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L1.}
+\end{table}
+
+\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
+
+\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
+
+L2 was tested in three independent ways and was refuted in all three.
+
+In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
+
+Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
+\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
+\label{fig:subtype}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
+E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
+E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
+E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L2.}
+\end{table}
+
+\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
+
+\subsection{Law L3 --- Two-regime household contact intensity}
+
+L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
+
+\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
+
+\subsection{Law L4 --- Cross-border mobility mechanism}
+
+L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
+
+\begin{itemize}[noitemsep]
+\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
+\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
+\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
+\end{itemize}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
+\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
+\label{fig:pakafg}
+\end{figure}
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Test & Result & Verdict \\
+\midrule
+E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
+E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
+E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
+E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
+\bottomrule
+\end{tabular}
+\caption{Experiments testing Law L4.}
+\end{table}
+
+\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
+
+\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
+
+L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
+
+The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
+
+\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
+
+%---------------------------------------------------------------
+\section{Pre-Registered Confirmatory Test of the Combined Theory}
+\label{sec:final}
+
+\subsection{Background and rationale}
+
+The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
+
+This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
+
+\subsection{AutoExperimentDesigner pre-registered protocol}
+
+The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
+
+\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
+
+\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
+
+\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
+
+\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
+
+\subsection{DataVoyager execution}
+
+The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
+
+A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
+
+\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
+
+The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
+
+\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
+
+In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
+
+The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
+
+\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
+
+At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
+
+\subsection{Combined verdict}
+
+All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
+
+\begin{table}[h]
+\centering
+\small
+\begin{tabular}{lll}
+\toprule
+Component & Key statistic & Status \\
+\midrule
+P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
+P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
+P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
+Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
+\bottomrule
+\end{tabular}
+\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
+\end{table}
+
+%---------------------------------------------------------------
+\section{Trustworthiness Analysis}
+
+\subsection{What we can trust}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
+\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
+\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
+\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
+\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
+\end{itemize}
+
+\subsection{Key limitations}
+
+\begin{itemize}[noitemsep]
+\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
+\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
+\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
+\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
+\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
+\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
+\end{itemize}
+
+\subsection{Deviations from protocol}
+
+\begin{itemize}[noitemsep]
+\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
+\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
+\end{itemize}
+
+%---------------------------------------------------------------
+\section{Conclusions}
+
+\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
+
+\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
+
+\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
+
+\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
+
+\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
+
+%---------------------------------------------------------------
+\section{Future Directions}
+
+\begin{enumerate}[noitemsep]
+\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
+\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
+\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
+\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
+\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
+\end{enumerate}
+
+%---------------------------------------------------------------
+\appendix
+
+\section{Computational Experiment Catalogue}
+\label{app:experiments}
+
+This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
+
+\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
+
+\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
+
+\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
+
+\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
+
+\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
+
+\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
+
+\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
+
+\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
+
+\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
+
+\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
+
+\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
+
+\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
+
+\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
+
+\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
+
+\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
+
+\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
+
+\section{Datasets}
+\label{app:datasets}
+
+\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
+\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
+
+\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
+
+\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
+
+\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
+
+\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
+
+\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
+
+\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
+
+\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
+
+\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
+
+\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
+\end{description}
+
+\section{References}
+
+The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
+
+\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
+\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
+
+\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
+
+\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
+
+\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
+
+\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
+
+\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
+
+\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
+
+\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
+
+\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
+
+\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
+
+\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
+
+\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
+
+\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
+
+\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
+
+\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
+
+\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
+
+\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
+\end{description}
+
+\end{document}
diff --git a/skills/research-step/assets/schemas.yaml b/skills/research-step/assets/schemas.yaml
index b840628..888db1b 100644
--- a/skills/research-step/assets/schemas.yaml
+++ b/skills/research-step/assets/schemas.yaml
@@ -1,20 +1,18 @@
-# Output schemas for research-step task types.
-# Each task issue stores its realized output at metadata.research_step.output,
-# matching the shape under `output:` for its task_type.
+# Output shapes for research-step tasks. Each task stores its output at
+# metadata.research_step.output, matching the shape under `output:` for its type.
+# Wiring (which task feeds which) lives in the templates, not here.
 
 schema_version: 1
 
 task_types:
 
   scope:
-    inputs: []
     output:
       question: string                   # the precise research question
       boundaries: [string]               # what is in / out of scope
       success_criteria: [string]         # how we know we have answered it
 
   definitions:
-    inputs: [scope]
     output:
       terms:
         - name: string
@@ -22,7 +20,6 @@ task_types:
           rationale: string
 
   literature_review:
-    inputs: [scope, definitions]
     output:
       summary_path: string               # relative path; long-form context
       key_findings: [string]             # 3-10 bullets readable without opening summary_path
@@ -34,7 +31,6 @@ task_types:
           relevance: string
 
   hypothesis:
-    inputs: [scope, literature_review]
     output:
       statement: string                  # H_n: ...
       rationale: string
@@ -42,7 +38,6 @@ task_types:
       expected_evidence: [string]
 
   experiment_design:
-    inputs: [hypothesis]
     output:
       method: string
       procedure: [string]                # ordered steps
@@ -53,7 +48,6 @@ task_types:
       artifacts_expected: [string]       # paths the gathering step will produce
 
   evidence_gathering:
-    inputs: [experiment_design]
     output:
       artifacts:
         - path: string
@@ -62,8 +56,17 @@ task_types:
       log_path: string                   # what was actually run
       deviations: [string]               # ways execution diverged from design
 
+  auto_discovery:
+    output:
+      runid: string                      # the AutoDS run (created or imported)
+      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
+      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
+      surprising_nodes:
+        - id: string                     # e.g. node_3_0
+          surprise: number
+          finding: string
+
   analysis:
-    inputs: [hypothesis, evidence_gathering]
     output:
       verdict: enum [supported, refuted, inconclusive]
       confidence: number                 # 0.0 - 1.0
@@ -71,7 +74,6 @@ task_types:
       caveats: [string]
 
   synthesis:
-    inputs: [scope, analysis_*]          # all analysis issues in the epic
     output:
       answer: string                     # answer to scope.question
       supporting_hypotheses: [bd_id]
diff --git a/skills/research-step/assets/theorizer_mission_example.md b/skills/research-step/assets/theorizer_mission_example.md
new file mode 100644
index 0000000..acaa800
--- /dev/null
+++ b/skills/research-step/assets/theorizer_mission_example.md
@@ -0,0 +1,78 @@
+# Example theorizer mission statement
+
+This is a worked example of the **mission statement** passed to the theorizer in the
+`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
+run's `mission.md`; it is the prompt the theorizer receives once the per-theme
+reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
+and the per-theme findings.
+
+A well-formed theorizer mission does five things, and this example shows all five:
+
+1. **States the question** in one sentence, naming the phenomenon and the population of interest.
+2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
+3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
+4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
+5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
+
+Tagging each finding/question/constraint with its supporting experiment is what keeps
+the returned theories anchorable: downstream, `theorizer_theories` drops any theory
+without ≥1 law anchor, and this structure makes the anchor explicit.
+
+---
+
+```
+Mission: Generate theories that explain the role of populations aged 5+ years in
+Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
+findings and the open questions they leave unresolved.
+
+SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
+  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
+      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
+  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
+      strengthening significantly after 2021 (X2).
+  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
+      by under-5 population share, with under-5 share dominating 15-64 working-age
+      share (T2 retry-1).
+  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
+      dominates in adult-heavy districts (X4, p<0.001).
+  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
+      scale (T5 retry-0/1).
+  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
+      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
+      (X7).
+
+OPEN QUESTIONS (theories should address at least one):
+  Q1. What replaced national Pol3 coverage as the dominant transmission lever
+      after 2018-2019?
+  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
+      drives the case coupling intensification?
+  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
+      in young districts) appear?
+  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
+      they are NOT the dominant district-level predictor but ARE plausibly the
+      operative mobility vectors?
+
+CONSTRAINTS (refuted framings to avoid):
+  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
+      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
+  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
+      refuted at district by T2, at province by T2 retry-4, on silent-transmission
+      signature by X3, and on subtype contrast by X4.
+  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
+      by T5 retry-0/1.
+  C4. Theories centered on resident Afghan refugee populations as a static mobility
+      channel — refuted by X7.
+
+REWARDED FRAMINGS:
+  R1. Theories that explain the 2018-2019 break date in terms of immunological,
+      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
+  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
+      deportations, seasonal transit) consistent with the post-2021 intensification.
+  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
+      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
+      while WPV1 retains a pediatric profile.
+  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
+      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
+  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
+      (mobility) — the two laws DV reproduced.
+```
diff --git a/skills/research-step/scripts/validate-output.sh b/skills/research-step/scripts/validate-output.sh
index 0f5a84e..7523283 100755
--- a/skills/research-step/scripts/validate-output.sh
+++ b/skills/research-step/scripts/validate-output.sh
@@ -1,14 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh — structural validation of a research_step output JSON.
 #
-# Usage: validate-output.sh <task_type> <metadata-json-file>
+# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the canonical metadata envelope
+#   2. carries the metadata wrapper
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
+# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
+# checks on its output.md: present, non-empty, has links, no unlinked entities.
 #
 # Exit codes:
 #   0  — valid
@@ -16,18 +18,27 @@
 #   3  — unknown task_type
 #   4  — missing required field
 #   5  — task_type mismatch with envelope
+#   6  — required output.md missing (only when [task-dir] supplied)
+#   7  — output.md empty or a stub (only when [task-dir] supplied)
+#   8  — output.md has no markdown links (only when [task-dir] supplied)
+#   9  — a named entity is unlinked (only when [task-dir] supplied)
+#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
+#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
+#           <3 embedded figures (14), a required section is missing (15)
 #
-# This is structural validation only. Quality validation (sound prediction,
-# sane confidence, valid citations) is out of scope per execute.md.
+# Structural checks only — required fields, working links, and the report's basic
+# pieces. It can't tell whether the science is sound or the writing is good; that's
+# the agent's job.
 set -euo pipefail
 
-if [[ $# -ne 2 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file>" >&2
+if [[ $# -lt 2 || $# -gt 3 ]]; then
+  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
   exit 1
 fi
 
 task_type="$1"
 file="$2"
+task_dir="${3:-}"
 
 if ! jq -e . "$file" > /dev/null 2>&1; then
   echo "validate-output: $file is not valid JSON" >&2
@@ -42,16 +53,17 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
+  auto_discovery)     required="runid status experiments_path" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
     echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis" >&2
+    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
     exit 3
     ;;
 esac
 
-# Envelope must carry the matching task_type so we don't validate scope JSON
+# The wrapper must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -63,7 +75,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Envelope shape sanity.
+# Wrapper shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -99,4 +111,58 @@ case "$task_type" in
     ;;
 esac
 
+# output.md document-quality gate. Every task must produce a human-readable
+# output.md (skill "Task outputs" table) that links the entities it names.
+if [[ -n "$task_dir" ]]; then
+  md="$task_dir/output.md"
+  if [[ ! -f "$md" ]]; then
+    echo "validate-output: required output.md not found at '$md'" >&2
+    exit 6
+  fi
+  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
+    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
+    exit 7
+  fi
+  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
+    echo "validate-output: output.md has no markdown links" >&2
+    exit 8
+  fi
+  # Strip links, then flag any named entity still bare in output.md / report.tex.
+  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
+    [[ -f "$f" ]] && perl -ne '
+      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
+      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
+      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
+    ' "$f"
+  done) || true
+  if [[ -n "$unlinked" ]]; then
+    echo "$unlinked" >&2
+    echo "validate-output: named entities above are unlinked" >&2
+    exit 9
+  fi
+
+  # The report's basics. Only the report node makes report.tex; when it exists,
+  # check it has what report_example.tex has. Each failure points back to it.
+  rpt="$task_dir/artifacts/report.tex"
+  if [[ -f "$rpt" ]]; then
+    ref="assets/report_example.tex"
+    rfail() {
+      echo "report-gate: $1" >&2
+      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
+      echo "     its depth and citation density before retrying." >&2
+      exit "$2"
+    }
+    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
+    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
+      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
+      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
+    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
+    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
+    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
+    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
+      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
+    done
+  fi
+fi
+
 echo "ok"
diff --git a/skills/research-step/templates/data_driven_theory_generation.md b/skills/research-step/templates/data_driven_theory_generation.md
new file mode 100644
index 0000000..756635c
--- /dev/null
+++ b/skills/research-step/templates/data_driven_theory_generation.md
@@ -0,0 +1,118 @@
+---
+name: data_driven_theory_generation
+description: |
+  See which of an AutoDS run's most surprising findings hold up on independent
+  data, then build theories on the ones that do and test the best with a new experiment.
+---
+
+# Data-driven theory generation
+
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  data_provenance["Data provenance"]
+  definitions --> data_provenance
+  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
+  data_provenance --> auto_discovery
+  subgraph sub1["for each of the 10 surprising findings"]
+    direction TB
+    hypothesis["Restate finding"]
+    literature_review["Literature search"]
+    experiment_design["Pre-register test"]
+    evidence_gathering["Find independent data"]
+    analysis["Replicate"]
+    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
+    analysis -- "retry: inconclusive → re-spec" --> experiment_design
+    analysis -- "retry: bad data → re-locate" --> evidence_gathering
+  end
+  auto_discovery --> hypothesis
+  replication_synthesis["Replication summary (k of 10, by mechanism)"]
+  analysis --> replication_synthesis
+  theorizer_theories["Theorizer-grounded theories"]
+  replication_synthesis --> theorizer_theories
+  novelty["Score theories for novelty"]
+  theorizer_theories --> novelty
+  subgraph sub2["for each of the top 3 theories"]
+    direction TB
+    followon_exp_design["Pre-register experiment (AED)"]
+    followon_evidence["Find new data"]
+    followon_analysis["Run, or leave as a proposal"]
+    followon_exp_design --> followon_evidence --> followon_analysis
+  end
+  novelty --> followon_exp_design
+  report["Closing report"]
+  followon_analysis --> report
+  report --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
+| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
+| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
+| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
+| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
+| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
+| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
+| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
+| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
+| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+
+The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
+
+## Running DataVoyager
+
+Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
+
+A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
+
+| What DataVoyager did | Go back to | Fix |
+|---|---|---|
+| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
+| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
+| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
+
+## mission.md
+
+- `run_pointer:` — the AutoDS run to import (omit to create one).
+- `datasets[]` — input dataset URIs for a new run.
+- A focus statement in the body — the question under study.
+
+Unless the user explicitly says to use local inputs only, fetch external public data for replication.
+
+## Writing the report and outputs
+
+These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
+
+- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
+- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
+- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
+
+  | thing | link to |
+  |---|---|
+  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
+  | paper | the asta document, paper URL, or `data_provenance` entry |
+  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
+  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
+  | dataset | the file under `inputs/`, or the Datasets appendix |
+  | experiment E-number | its appendix entry |
+
+- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
+- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
+- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/skills/research-step/templates/hypothesis_driven_research.md b/skills/research-step/templates/hypothesis_driven_research.md
new file mode 100644
index 0000000..eb3c847
--- /dev/null
+++ b/skills/research-step/templates/hypothesis_driven_research.md
@@ -0,0 +1,50 @@
+---
+name: hypothesis_driven_research
+description: |
+  Literature-grounded hypothesis generation. Survey the literature, raise a
+  hypothesis per gap, test each, and write a closing report.
+---
+
+# Hypothesis-driven research
+
+Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
+
+## Flow
+
+```mermaid
+flowchart TD
+  start([start])
+  scope["Scope"]
+  start --> scope
+  definitions["Definitions"]
+  scope --> definitions
+  lit_review["Literature review"]
+  definitions --> lit_review
+  subgraph sub1["for each gap"]
+    direction TB
+    hypothesis["Hypothesis"]
+    experiment_design["Experiment design"]
+    evidence_gathering["Evidence gathering"]
+    analysis["Analysis"]
+    hypothesis --> experiment_design --> evidence_gathering --> analysis
+  end
+  lit_review --> hypothesis
+  closing["Closing synthesis"]
+  analysis --> closing
+  closing --> stop([stop])
+```
+
+## Nodes
+
+| id | type | inputs | description | skills |
+|---|---|---|---|---|
+| `scope` | `scope` | — | One line: the question under study. | — |
+| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
+| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
+| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
+| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
+| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
+| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
+| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
+
+The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/skills/research-step/workflows/execute.md b/skills/research-step/workflows/execute.md
index 5fba9ea..61bebc7 100644
--- a/skills/research-step/workflows/execute.md
+++ b/skills/research-step/workflows/execute.md
@@ -1,6 +1,6 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
 
 ## Preconditions
 
@@ -9,22 +9,17 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). Hypothesis tasks are normally auto-resolved at creation by **plan**, so they should not appear here. If one does, it means the gap text was too thin for plan to fill the output without inventing content — flag this to the user and ask whether to refine the source `literature_review` first.
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce a JSON object matching the schema. For schema fields ending in `_path`, write the file to disk first and put the relative path in the JSON.
-6. **Validate structurally.** Run `scripts/validate-output.sh <task_type> <metadata-json-file>`. It checks the envelope (`research_step.task_type`, `inputs`, `output_schema_version`, `output`) and every required `output.<key>` for the task_type, plus type spot-checks for the high-leverage cases (e.g., `analysis.verdict` enum, `analysis.confidence` range). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Materialize the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan or update-summary.** Some closed task types unlock new graph structure; others don't. Decide based on the closed task's `task_type`:
-
-   | Closed task_type | Hand off to |
-   |---|---|
-   | `literature_review`, `hypothesis`, `analysis`, `synthesis` | **plan** (with this issue as the source). `plan` then chains to **update-summary**. Note: `hypothesis` only reaches this branch in the rare case it was left open at creation; the normal path is plan→auto-resolve. |
-   | `scope`, `definitions`, `experiment_design`, `evidence_gathering` | **update-summary** directly. |
+5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
 
-   Either path ends with `summary.md` rebuilt.
+   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
+6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
+7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
+8. **Close.** `bd close <id>`.
+9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
 
 ## Notes on output files
 
diff --git a/skills/research-step/workflows/init.md b/skills/research-step/workflows/init.md
index fd11be3..4df19c0 100644
--- a/skills/research-step/workflows/init.md
+++ b/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
+After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
 
 ## Preconditions
 
diff --git a/skills/research-step/workflows/plan.md b/skills/research-step/workflows/plan.md
index c5ffb2d..e0a158d 100644
--- a/skills/research-step/workflows/plan.md
+++ b/skills/research-step/workflows/plan.md
@@ -1,99 +1,61 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier (scope, definitions, literature_review) from `mission.md`.
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
+- **replan** — an epic exists. Add the next tasks after one closes.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized — else run **init**.
+- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
+- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
 
-## Issue metadata convention
-
-Every task issue carries:
-
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
-
-The mission epic additionally carries `epic_root: true`.
+Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
 
 ## Mode selection
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
 
-## Bootstrap mode
+## Bootstrap
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
+1. Read `mission.md` (abort to **brainstorm** if missing).
+2. Create the epic:
    ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. **Create the initial frontier.** Three `task` issues with the metadata convention above:
-   - `scope: <one-line>` — `inputs: []`
-   - `definitions: <one-line>` — `inputs: [<scope-id>]`
-   - `literature_review: <one-line>` — `inputs: [<scope-id>, <definitions-id>]`
-4. **Add edges.**
-   - `parent-child` from each frontier task to the epic
-   - `blocks`: scope → definitions; scope → literature_review; definitions → literature_review
-5. **Report.** Print the epic ID and the three task IDs.
-
-## Replan mode
-
-Read the source task's task_type and output:
-
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
+4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
 
-Apply this table:
+## Replan
 
-| Source task_type | Action |
-|---|---|
-| `literature_review` | For each gap in `output.gaps`, create a `hypothesis` task with `inputs: [<scope-id>, <source-id>]`. Edges: `parent-child` to epic; `blocks` from the source. **Populate `metadata.research_step.output` at creation time** (see below) and close the issue immediately — the gap text already contains the statement, rationale, and prediction in prose, so there is no separate `execute` pass for hypotheses. |
-| `hypothesis` | Create the chain `experiment_design` → `evidence_gathering` → `analysis`, each `blocks` the next. `experiment_design` depends on the hypothesis (via `inputs`); `analysis` depends on both the hypothesis and the new `evidence_gathering`. All three get `parent-child` to the epic. |
-| `analysis` | If every `hypothesis` in the epic now has a closed `analysis`, create one `synthesis` task with `inputs` listing all analysis IDs and the scope ID. `parent-child` to epic; `blocks` from each analysis. Otherwise no-op. |
-| `synthesis` | If `output.open_questions` is non-empty, **stop and ask the user** before creating new `hypothesis` tasks. If approved, create them with a `discovered-from` edge back to the synthesis (in addition to the usual edges). |
-| `scope`, `definitions`, `experiment_design`, `evidence_gathering` | No replan. Report no-op and stop. |
+The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
+- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
+- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
 
-### Auto-resolving hypothesis tasks
+### Filling in hypotheses
 
-When creating a `hypothesis` from a literature_review gap:
+A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
 
-1. Derive the four output fields directly from the gap text and surrounding `literature_review` output (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why this gap implies the claim
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Validate with `scripts/validate-output.sh hypothesis <metadata-json-file>` before persisting.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
+2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
+3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
+4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
+5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**.
 
-## Out of scope
+## Not here
 
-- Running tasks or producing outputs. That belongs to **execute**.
-- Environment setup (installing `bd`/`jq`, `bd init`). That belongs to **init**.
-- Editing `mission.md`. That belongs to **brainstorm**.
-- Validating output quality.
+Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
diff --git a/src/asta/analyze_data/poll.py b/src/asta/analyze_data/poll.py
index f2e0af5..9372a71 100644
--- a/src/asta/analyze_data/poll.py
+++ b/src/asta/analyze_data/poll.py
@@ -4,29 +4,25 @@
 analyze-data skill body. Status ticks go to stderr; the final Task JSON
 goes to ``--output`` (or stdout) so the harness's background-task log
 shows progress without polluting the captured payload.
+
+Delegates the actual polling + rendering to the shared
+``asta_agent.a2a.commands._poll_until_terminal``, so step-progress
+events, parent/child indent, elapsed times, and artifact lines all
+surface here — the previous bespoke loop only emitted ``state=...``
+ticks (and one per poll, even when nothing changed).
 """
 
 from __future__ import annotations
 
-import time
-from datetime import datetime
+import json
 
 import click
-from a2a.types import Task, TaskState
-from asta_agent.a2a.client import A2AClient, A2AError
+from asta_agent.a2a.client import A2AClient
+from asta_agent.a2a.commands import _poll_until_terminal
 
 from asta.analyze_data._url import dv_url
 from asta.utils.auth_helper import get_access_token
 
-_TERMINAL_STATES = {
-    TaskState.completed,
-    TaskState.failed,
-    TaskState.input_required,
-    TaskState.canceled,
-    TaskState.rejected,
-    TaskState.auth_required,
-}
-
 
 @click.command()
 @click.argument("task_id")
@@ -39,44 +35,23 @@
 )
 @click.option(
     "--interval",
-    default=60,
-    show_default=True,
+    default=None,
     type=click.IntRange(min=1),
-    help="Seconds between polls.",
+    help="Seconds between polls. Omit for the SDK's adaptive cadence "
+         "(5×6 then 15×20 then 60s).",
 )
-def poll(task_id: str, output: str | None, interval: int) -> None:
+def poll(task_id: str, output: str | None, interval: int | None) -> None:
     """Poll TASK_ID until it reaches a terminal state, then emit the final Task JSON.
 
     Terminal states: completed, failed, input-required, canceled, rejected, auth-required.
-    Status ticks ([HH:MM:SS] state=...) are written to stderr; transient errors
-    are logged and retried.
+    Progress lines ([HH:MM:SS] state=…, step labels, artifacts) go to stderr;
+    the final Task JSON goes to --output (or stdout).
     """
     client = A2AClient(dv_url(), api_key=get_access_token())
-
-    while True:
-        ts = datetime.now().strftime("%H:%M:%S")
-        try:
-            result = client.get_task(task_id)
-            parsed = Task.model_validate(result)
-        except A2AError as e:
-            click.echo(f"[{ts}] error: {e.code} {e}", err=True)
-            time.sleep(interval)
-            continue
-        except Exception as e:
-            click.echo(f"[{ts}] error: {e}", err=True)
-            time.sleep(interval)
-            continue
-
-        state = parsed.status.state
-        click.echo(f"[{ts}] state={state.value}", err=True)
-
-        if state in _TERMINAL_STATES:
-            payload = parsed.model_dump_json(by_alias=True, indent=2, exclude_none=True)
-            if output:
-                with open(output, "w") as f:
-                    f.write(payload)
-            else:
-                click.echo(payload)
-            return
-
-        time.sleep(interval)
+    final = _poll_until_terminal(client, task_id, interval=interval)
+    payload = json.dumps(final, indent=2)
+    if output:
+        with open(output, "w") as f:
+            f.write(payload)
+    else:
+        click.echo(payload)
diff --git a/src/asta/auto_exp_designer.py b/src/asta/auto_exp_designer.py
new file mode 100644
index 0000000..21b35ce
--- /dev/null
+++ b/src/asta/auto_exp_designer.py
@@ -0,0 +1,20 @@
+from asta_agent.a2a.commands import make_a2a_group
+
+from asta.utils.auth_helper import get_access_token
+from asta.utils.config import get_api_config
+
+
+def _auto_exp_designer_url() -> str:
+    return get_api_config("auto-exp-designer")["base_url"]
+
+
+auto_exp_designer = make_a2a_group(
+    name="auto-exp-designer",
+    url_factory=_auto_exp_designer_url,
+    token_factory=get_access_token,
+    help=(
+        "Design computational experiments via the Auto Experiment Designer agent.\n\n"
+        "Subcommands talk to the agent through asta-gateway. Auth comes from\n"
+        "`asta auth login`."
+    ),
+)
diff --git a/src/asta/cli.py b/src/asta/cli.py
index 6e040e4..ca69e85 100644
--- a/src/asta/cli.py
+++ b/src/asta/cli.py
@@ -5,10 +5,12 @@
 
 from asta import __version__
 from asta.analyze_data import analyze_data
+from asta.auto_exp_designer import auto_exp_designer
 from asta.autodiscovery.commands import autodiscovery
 from asta.commands.auth import auth
 from asta.documents import documents
 from asta.experiment import experiment
+from asta.flows import flows
 from asta.literature.find import find
 from asta.literature.interactive import interactive
 from asta.papers.author import author
@@ -51,6 +53,9 @@ def papers():
 # Register analyze-data commands
 cli.add_command(analyze_data)
 
+# Register auto-exp-designer commands
+cli.add_command(auto_exp_designer)
+
 # Register artifacts command
 cli.add_command(artifacts, name="artifacts")
 
@@ -59,6 +64,7 @@ def papers():
 cli.add_command(experiment)
 cli.add_command(pdf_extraction)
 cli.add_command(autodiscovery)
+cli.add_command(flows)
 
 # Register literature subcommands
 literature.add_command(find)
diff --git a/src/asta/flows/__init__.py b/src/asta/flows/__init__.py
new file mode 100644
index 0000000..3159a79
--- /dev/null
+++ b/src/asta/flows/__init__.py
@@ -0,0 +1,5 @@
+"""Flows subcommand - pass-through to asta-flows CLI"""
+
+from .passthrough import flows
+
+__all__ = ["flows"]
diff --git a/src/asta/flows/passthrough.py b/src/asta/flows/passthrough.py
new file mode 100644
index 0000000..5d565b5
--- /dev/null
+++ b/src/asta/flows/passthrough.py
@@ -0,0 +1,15 @@
+"""Pass-through command for asta-flows CLI"""
+
+from asta.utils.config import get_config
+from asta.utils.passthrough import create_passthrough_command
+
+config = get_config()["passthrough"]["flows"]
+
+flows = create_passthrough_command(
+    tool_name=config["tool_name"],
+    install_type=config["install_type"],
+    install_source=config["install_source"],
+    minimum_version=config["minimum_version"],
+    command_name=config["command_name"],
+    docstring=config["docstring"],
+)
diff --git a/src/asta/utils/asta.conf b/src/asta/utils/asta.conf
index f0d6ae0..2b214e5 100644
--- a/src/asta/utils/asta.conf
+++ b/src/asta/utils/asta.conf
@@ -64,6 +64,11 @@ apis {
     base_url = ${auth.gateway_url}"/api/analyze-data"
     base_url = ${?ASTA_ANALYZE_DATA_URL}
   }
+
+  # Auto Experiment Designer A2A agent
+  auto-exp-designer {
+    base_url = ${auth.gateway_url}"/api/auto-exp-designer"
+  }
 }
 
 # Passthrough command configurations
@@ -106,4 +111,13 @@ passthrough {
     docstring = "Extract text from PDFs using olmOCR"
   }
 
+  flows {
+    tool_name = "asta-flows"
+    install_type = "local"
+    install_source = "~/workspace/asta-flows"
+    minimum_version = "0.1.0"
+    command_name = "flows"
+    docstring = "Live web UI for research-step runs"
+  }
+
 }

From a7d7f13b1c474cdff888f3fc3a034e24c45ab175 Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Tue, 2 Jun 2026 15:31:28 -0700
Subject: [PATCH 2/6] Cleaned up verbiage

---
 .../skills/research-step/SKILL.md             |  2 +-
 .../research-step/scripts/validate-output.sh  | 16 ++--
 .../data_driven_theory_generation.md          |  8 +-
 .../examples}/report_example.tex              |  0
 .../examples}/theorizer_mission_example.md    |  0
 .../skills/research-step/workflows/execute.md |  2 +-
 .../skills/research-step/workflows/plan.md    | 90 ++++++++++++-------
 plugins/asta/skills/research-step/SKILL.md    |  2 +-
 .../research-step/scripts/validate-output.sh  | 16 ++--
 .../data_driven_theory_generation.md          |  8 +-
 .../examples}/report_example.tex              |  0
 .../examples}/theorizer_mission_example.md    |  0
 .../skills/research-step/workflows/execute.md |  2 +-
 .../skills/research-step/workflows/plan.md    | 90 ++++++++++++-------
 skills/research-step/SKILL.md                 |  2 +-
 .../research-step/scripts/validate-output.sh  | 16 ++--
 .../data_driven_theory_generation.md          |  8 +-
 .../examples}/report_example.tex              |  0
 .../examples}/theorizer_mission_example.md    |  0
 skills/research-step/workflows/execute.md     |  2 +-
 skills/research-step/workflows/plan.md        | 90 ++++++++++++-------
 21 files changed, 216 insertions(+), 138 deletions(-)
 rename plugins/asta-preview/skills/research-step/{assets => templates/examples}/report_example.tex (100%)
 rename plugins/asta-preview/skills/research-step/{assets => templates/examples}/theorizer_mission_example.md (100%)
 rename plugins/asta/skills/research-step/{assets => templates/examples}/report_example.tex (100%)
 rename plugins/asta/skills/research-step/{assets => templates/examples}/theorizer_mission_example.md (100%)
 rename skills/research-step/{assets => templates/examples}/report_example.tex (100%)
 rename skills/research-step/{assets => templates/examples}/theorizer_mission_example.md (100%)

diff --git a/plugins/asta-preview/skills/research-step/SKILL.md b/plugins/asta-preview/skills/research-step/SKILL.md
index 0181287..3735bb5 100644
--- a/plugins/asta-preview/skills/research-step/SKILL.md
+++ b/plugins/asta-preview/skills/research-step/SKILL.md
@@ -51,7 +51,7 @@ Available templates:
 
 | Name | Purpose |
 |---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
 | `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
 
 ### Task outputs
diff --git a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
index 7523283..ab46d65 100755
--- a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
@@ -5,12 +5,12 @@
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the metadata wrapper
+#   2. carries the metadata envelope
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
 # If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md: present, non-empty, has links, no unlinked entities.
+# checks on its output.md.
 #
 # Exit codes:
 #   0  — valid
@@ -26,9 +26,7 @@
 #           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
 #           <3 embedded figures (14), a required section is missing (15)
 #
-# Structural checks only — required fields, working links, and the report's basic
-# pieces. It can't tell whether the science is sound or the writing is good; that's
-# the agent's job.
+# Structural checks only — required fields, working links, and the report's basic pieces.
 set -euo pipefail
 
 if [[ $# -lt 2 || $# -gt 3 ]]; then
@@ -53,7 +51,7 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path" ;;
+  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
@@ -63,7 +61,7 @@ case "$task_type" in
     ;;
 esac
 
-# The wrapper must carry the matching task_type so we don't validate scope JSON
+# The envelope must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -75,7 +73,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Wrapper shape.
+# Envelope shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -145,7 +143,7 @@ if [[ -n "$task_dir" ]]; then
   # check it has what report_example.tex has. Each failure points back to it.
   rpt="$task_dir/artifacts/report.tex"
   if [[ -f "$rpt" ]]; then
-    ref="assets/report_example.tex"
+    ref="templates/examples/report_example.tex"
     rfail() {
       echo "report-gate: $1" >&2
       echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
diff --git a/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
index 756635c..15a5875 100644
--- a/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
+++ b/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
@@ -2,12 +2,12 @@
 name: data_driven_theory_generation
 description: |
   See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the best with a new experiment.
+  data, then build theories on the ones that do and test the most promising with new experiments.
 ---
 
 # Data-driven theory generation
 
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
 
 ## Flow
 
@@ -67,12 +67,12 @@ flowchart TD
 | `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
 | `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
 | `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
 | `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
 | `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
 | `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
 | `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
 
 The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
 
diff --git a/plugins/asta-preview/skills/research-step/assets/report_example.tex b/plugins/asta-preview/skills/research-step/templates/examples/report_example.tex
similarity index 100%
rename from plugins/asta-preview/skills/research-step/assets/report_example.tex
rename to plugins/asta-preview/skills/research-step/templates/examples/report_example.tex
diff --git a/plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md b/plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md
similarity index 100%
rename from plugins/asta-preview/skills/research-step/assets/theorizer_mission_example.md
rename to plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md
diff --git a/plugins/asta-preview/skills/research-step/workflows/execute.md b/plugins/asta-preview/skills/research-step/workflows/execute.md
index 61bebc7..3d1a84f 100644
--- a/plugins/asta-preview/skills/research-step/workflows/execute.md
+++ b/plugins/asta-preview/skills/research-step/workflows/execute.md
@@ -9,7 +9,7 @@ Run one ready task end-to-end. Loads its schema, gathers its inputs, produces th
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
diff --git a/plugins/asta-preview/skills/research-step/workflows/plan.md b/plugins/asta-preview/skills/research-step/workflows/plan.md
index e0a158d..06ae941 100644
--- a/plugins/asta-preview/skills/research-step/workflows/plan.md
+++ b/plugins/asta-preview/skills/research-step/workflows/plan.md
@@ -1,60 +1,88 @@
 # Workflow: plan
 
-Create or extend the research graph. Two modes:
+Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
 
-- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
-- **replan** — an epic exists. Add the next tasks after one closes.
+- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
+- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
 
-Always chains to **update-summary** afterward.
+Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
 
 ## Preconditions
 
-- `bd` installed and `.beads/` initialized — else run **init**.
-- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
-- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
+- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
+- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
+- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
 
-Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
+## Issue metadata convention
+
+Every task issue carries:
+
+```json
+{
+  "research_step": {
+    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
+    "inputs": ["bd-xxxx", "bd-yyyy"],
+    "output_schema_version": 1,
+    "output": null
+  }
+}
+```
+
+The mission epic additionally carries `epic_root: true`.
 
 ## Mode selection
 
-Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
+1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
+2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
 
-## Bootstrap
+## Bootstrap mode
 
-1. Read `mission.md` (abort to **brainstorm** if missing).
-2. Create the epic:
+1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
+2. **Create the epic.**
    ```
-   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
-4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
+3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
+4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
+5. **Report.** Print the epic ID and the created task IDs.
+
+## Replan mode
+
+Read the source task's task_type and output:
+
+```
+bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
+bd show <source-id> --json | jq '.[0].metadata.research_step.output'
+```
 
-## Replan
+Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
 
-The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
+- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
+- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
+- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
 
-- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
-- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
-- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
+If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
 
-### Filling in hypotheses
+### Auto-resolving hypothesis tasks
 
-A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
+When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
 
-1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
-2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
-3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
-4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
-5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
+1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
+   - `statement` — `H_n: <one-sentence claim>`
+   - `rationale` — why the source implies the claim (for a finding, cite its node id)
+   - `falsifiable_prediction` — what observation would refute it
+   - `expected_evidence` — list of concrete evidence types that would support it
+2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
+3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
 
-If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
+If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
 
 ## After either mode
 
-Hand off to **update-summary**.
+Hand off to **update-summary** so `summary.md` reflects the new state.
 
 ## Not here
 
diff --git a/plugins/asta/skills/research-step/SKILL.md b/plugins/asta/skills/research-step/SKILL.md
index 0181287..3735bb5 100644
--- a/plugins/asta/skills/research-step/SKILL.md
+++ b/plugins/asta/skills/research-step/SKILL.md
@@ -51,7 +51,7 @@ Available templates:
 
 | Name | Purpose |
 |---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
 | `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
 
 ### Task outputs
diff --git a/plugins/asta/skills/research-step/scripts/validate-output.sh b/plugins/asta/skills/research-step/scripts/validate-output.sh
index 7523283..ab46d65 100755
--- a/plugins/asta/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta/skills/research-step/scripts/validate-output.sh
@@ -5,12 +5,12 @@
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the metadata wrapper
+#   2. carries the metadata envelope
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
 # If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md: present, non-empty, has links, no unlinked entities.
+# checks on its output.md.
 #
 # Exit codes:
 #   0  — valid
@@ -26,9 +26,7 @@
 #           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
 #           <3 embedded figures (14), a required section is missing (15)
 #
-# Structural checks only — required fields, working links, and the report's basic
-# pieces. It can't tell whether the science is sound or the writing is good; that's
-# the agent's job.
+# Structural checks only — required fields, working links, and the report's basic pieces.
 set -euo pipefail
 
 if [[ $# -lt 2 || $# -gt 3 ]]; then
@@ -53,7 +51,7 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path" ;;
+  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
@@ -63,7 +61,7 @@ case "$task_type" in
     ;;
 esac
 
-# The wrapper must carry the matching task_type so we don't validate scope JSON
+# The envelope must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -75,7 +73,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Wrapper shape.
+# Envelope shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -145,7 +143,7 @@ if [[ -n "$task_dir" ]]; then
   # check it has what report_example.tex has. Each failure points back to it.
   rpt="$task_dir/artifacts/report.tex"
   if [[ -f "$rpt" ]]; then
-    ref="assets/report_example.tex"
+    ref="templates/examples/report_example.tex"
     rfail() {
       echo "report-gate: $1" >&2
       echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
diff --git a/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
index 756635c..15a5875 100644
--- a/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
+++ b/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
@@ -2,12 +2,12 @@
 name: data_driven_theory_generation
 description: |
   See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the best with a new experiment.
+  data, then build theories on the ones that do and test the most promising with new experiments.
 ---
 
 # Data-driven theory generation
 
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
 
 ## Flow
 
@@ -67,12 +67,12 @@ flowchart TD
 | `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
 | `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
 | `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
 | `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
 | `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
 | `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
 | `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
 
 The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
 
diff --git a/plugins/asta/skills/research-step/assets/report_example.tex b/plugins/asta/skills/research-step/templates/examples/report_example.tex
similarity index 100%
rename from plugins/asta/skills/research-step/assets/report_example.tex
rename to plugins/asta/skills/research-step/templates/examples/report_example.tex
diff --git a/plugins/asta/skills/research-step/assets/theorizer_mission_example.md b/plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md
similarity index 100%
rename from plugins/asta/skills/research-step/assets/theorizer_mission_example.md
rename to plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md
diff --git a/plugins/asta/skills/research-step/workflows/execute.md b/plugins/asta/skills/research-step/workflows/execute.md
index 61bebc7..3d1a84f 100644
--- a/plugins/asta/skills/research-step/workflows/execute.md
+++ b/plugins/asta/skills/research-step/workflows/execute.md
@@ -9,7 +9,7 @@ Run one ready task end-to-end. Loads its schema, gathers its inputs, produces th
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
diff --git a/plugins/asta/skills/research-step/workflows/plan.md b/plugins/asta/skills/research-step/workflows/plan.md
index e0a158d..06ae941 100644
--- a/plugins/asta/skills/research-step/workflows/plan.md
+++ b/plugins/asta/skills/research-step/workflows/plan.md
@@ -1,60 +1,88 @@
 # Workflow: plan
 
-Create or extend the research graph. Two modes:
+Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
 
-- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
-- **replan** — an epic exists. Add the next tasks after one closes.
+- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
+- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
 
-Always chains to **update-summary** afterward.
+Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
 
 ## Preconditions
 
-- `bd` installed and `.beads/` initialized — else run **init**.
-- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
-- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
+- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
+- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
+- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
 
-Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
+## Issue metadata convention
+
+Every task issue carries:
+
+```json
+{
+  "research_step": {
+    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
+    "inputs": ["bd-xxxx", "bd-yyyy"],
+    "output_schema_version": 1,
+    "output": null
+  }
+}
+```
+
+The mission epic additionally carries `epic_root: true`.
 
 ## Mode selection
 
-Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
+1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
+2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
 
-## Bootstrap
+## Bootstrap mode
 
-1. Read `mission.md` (abort to **brainstorm** if missing).
-2. Create the epic:
+1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
+2. **Create the epic.**
    ```
-   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
-4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
+3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
+4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
+5. **Report.** Print the epic ID and the created task IDs.
+
+## Replan mode
+
+Read the source task's task_type and output:
+
+```
+bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
+bd show <source-id> --json | jq '.[0].metadata.research_step.output'
+```
 
-## Replan
+Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
 
-The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
+- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
+- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
+- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
 
-- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
-- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
-- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
+If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
 
-### Filling in hypotheses
+### Auto-resolving hypothesis tasks
 
-A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
+When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
 
-1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
-2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
-3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
-4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
-5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
+1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
+   - `statement` — `H_n: <one-sentence claim>`
+   - `rationale` — why the source implies the claim (for a finding, cite its node id)
+   - `falsifiable_prediction` — what observation would refute it
+   - `expected_evidence` — list of concrete evidence types that would support it
+2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
+3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
 
-If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
+If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
 
 ## After either mode
 
-Hand off to **update-summary**.
+Hand off to **update-summary** so `summary.md` reflects the new state.
 
 ## Not here
 
diff --git a/skills/research-step/SKILL.md b/skills/research-step/SKILL.md
index 0181287..3735bb5 100644
--- a/skills/research-step/SKILL.md
+++ b/skills/research-step/SKILL.md
@@ -51,7 +51,7 @@ Available templates:
 
 | Name | Purpose |
 |---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the best with a new experiment. |
+| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
 | `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
 
 ### Task outputs
diff --git a/skills/research-step/scripts/validate-output.sh b/skills/research-step/scripts/validate-output.sh
index 7523283..ab46d65 100755
--- a/skills/research-step/scripts/validate-output.sh
+++ b/skills/research-step/scripts/validate-output.sh
@@ -5,12 +5,12 @@
 #
 # Verifies that the JSON file:
 #   1. parses
-#   2. carries the metadata wrapper
+#   2. carries the metadata envelope
 #      ({research_step: {task_type, inputs, output_schema_version, output}})
 #   3. has every required `output.<key>` for the given <task_type> per
 #      assets/schemas.yaml (schema_version: 1)
 # If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md: present, non-empty, has links, no unlinked entities.
+# checks on its output.md.
 #
 # Exit codes:
 #   0  — valid
@@ -26,9 +26,7 @@
 #           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
 #           <3 embedded figures (14), a required section is missing (15)
 #
-# Structural checks only — required fields, working links, and the report's basic
-# pieces. It can't tell whether the science is sound or the writing is good; that's
-# the agent's job.
+# Structural checks only — required fields, working links, and the report's basic pieces.
 set -euo pipefail
 
 if [[ $# -lt 2 || $# -gt 3 ]]; then
@@ -53,7 +51,7 @@ case "$task_type" in
   hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
   experiment_design)  required="method procedure variables artifacts_expected" ;;
   evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path" ;;
+  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
   analysis)           required="verdict confidence reasoning caveats" ;;
   synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
   *)
@@ -63,7 +61,7 @@ case "$task_type" in
     ;;
 esac
 
-# The wrapper must carry the matching task_type so we don't validate scope JSON
+# The envelope must carry the matching task_type so we don't validate scope JSON
 # against an analysis schema by accident.
 envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
 if [[ -z "$envelope_type" ]]; then
@@ -75,7 +73,7 @@ if [[ "$envelope_type" != "$task_type" ]]; then
   exit 5
 fi
 
-# Wrapper shape.
+# Envelope shape.
 for key in inputs output_schema_version output; do
   if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
     echo "validate-output: $file missing .research_step.$key" >&2
@@ -145,7 +143,7 @@ if [[ -n "$task_dir" ]]; then
   # check it has what report_example.tex has. Each failure points back to it.
   rpt="$task_dir/artifacts/report.tex"
   if [[ -f "$rpt" ]]; then
-    ref="assets/report_example.tex"
+    ref="templates/examples/report_example.tex"
     rfail() {
       echo "report-gate: $1" >&2
       echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
diff --git a/skills/research-step/templates/data_driven_theory_generation.md b/skills/research-step/templates/data_driven_theory_generation.md
index 756635c..15a5875 100644
--- a/skills/research-step/templates/data_driven_theory_generation.md
+++ b/skills/research-step/templates/data_driven_theory_generation.md
@@ -2,12 +2,12 @@
 name: data_driven_theory_generation
 description: |
   See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the best with a new experiment.
+  data, then build theories on the ones that do and test the most promising with new experiments.
 ---
 
 # Data-driven theory generation
 
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theory on what survives and run a follow-up experiment.
+Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
 
 ## Flow
 
@@ -67,12 +67,12 @@ flowchart TD
 | `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
 | `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
 | `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](../assets/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
+| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
 | `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
 | `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
 | `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
 | `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](../assets/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
+| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
 
 The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
 
diff --git a/skills/research-step/assets/report_example.tex b/skills/research-step/templates/examples/report_example.tex
similarity index 100%
rename from skills/research-step/assets/report_example.tex
rename to skills/research-step/templates/examples/report_example.tex
diff --git a/skills/research-step/assets/theorizer_mission_example.md b/skills/research-step/templates/examples/theorizer_mission_example.md
similarity index 100%
rename from skills/research-step/assets/theorizer_mission_example.md
rename to skills/research-step/templates/examples/theorizer_mission_example.md
diff --git a/skills/research-step/workflows/execute.md b/skills/research-step/workflows/execute.md
index 61bebc7..3d1a84f 100644
--- a/skills/research-step/workflows/execute.md
+++ b/skills/research-step/workflows/execute.md
@@ -9,7 +9,7 @@ Run one ready task end-to-end. Loads its schema, gathers its inputs, produces th
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that just restates a gap or finding is auto-resolved by **plan** at creation, so it won't appear here — if one does, the source was too thin for plan to fill without inventing content; flag it to the user. (Hypothesis-typed tasks that run a skill, like the theorizer and novelty scoring, do execute here.)
+1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
 2. **Claim it.** `bd update <id> --status=in_progress`.
 3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
 4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
diff --git a/skills/research-step/workflows/plan.md b/skills/research-step/workflows/plan.md
index e0a158d..06ae941 100644
--- a/skills/research-step/workflows/plan.md
+++ b/skills/research-step/workflows/plan.md
@@ -1,60 +1,88 @@
 # Workflow: plan
 
-Create or extend the research graph. Two modes:
+Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
 
-- **bootstrap** — no epic yet. Create the mission epic and the template's first tasks from `mission.md` (default template: `hypothesis_driven_research`).
-- **replan** — an epic exists. Add the next tasks after one closes.
+- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
+- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
 
-Always chains to **update-summary** afterward.
+Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
 
 ## Preconditions
 
-- `bd` installed and `.beads/` initialized — else run **init**.
-- **bootstrap**: `mission.md` is non-empty and `scripts/epic-root.sh` says `status: none`. If `mission.md` is missing, send the user to **brainstorm**.
-- **replan**: `scripts/epic-root.sh` says `status: found`. A source task passed in (usually by **execute**) is closed with a populated output.
+- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
+- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
+- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
 
-Each task's metadata holds its `task_type`, `inputs` (the bd ids it reads), `output_schema_version`, and `output`. The epic also carries `epic_root: true`.
+## Issue metadata convention
+
+Every task issue carries:
+
+```json
+{
+  "research_step": {
+    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
+    "inputs": ["bd-xxxx", "bd-yyyy"],
+    "output_schema_version": 1,
+    "output": null
+  }
+}
+```
+
+The mission epic additionally carries `epic_root: true`.
 
 ## Mode selection
 
-Run `scripts/epic-root.sh`. `status: none` → bootstrap. `status: found` → replan, around the closed task the caller named; if none was named, ask which closed task to build on.
+1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
+2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
 
-## Bootstrap
+## Bootstrap mode
 
-1. Read `mission.md` (abort to **brainstorm** if missing).
-2. Create the epic:
+1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
+2. **Create the epic.**
    ```
-   bd create --type=epic --title="<one line from mission.md>" --description="$(cat mission.md)"
+   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
    bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
    ```
-3. Create the template's first tasks, in order, up to its first "for each", taking each task's `type` / `inputs` / `skills` from its row. (Default template: `scope` → `definitions` → `literature_review`.)
-4. Add edges: `parent-child` to the epic, and `blocks` from each task named in another's `inputs`.
+3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
+4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
+5. **Report.** Print the epic ID and the created task IDs.
+
+## Replan mode
+
+Read the source task's task_type and output:
+
+```
+bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
+bd show <source-id> --json | jq '.[0].metadata.research_step.output'
+```
 
-## Replan
+Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
 
-The template (named in `mission.md`; default `hypothesis_driven_research`) is the plan. Find the closed task's node in it and create what comes next, taking each new task's `type` / `inputs` / `skills` from its row:
+- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
+- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
+- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
+- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
+- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
 
-- **Next step:** create the node(s) the diagram points to. Set inputs from the row, block on each, parent to the epic.
-- **For each:** if the closed node is the one a "for each" runs over, create one copy of the block's tasks per item.
-- **After a for-each:** create the task that follows the block only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed on creation, not executed (see below). Because they close immediately, also create the step that follows each one in the same pass — otherwise nothing is left open for **execute** to pick up. In general, keep creating whatever just came unblocked until the frontier is tasks that need an execute pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `open_questions`, ask the user before adding follow-up hypotheses. Don't add tasks the template doesn't have.
+If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
 
-### Filling in hypotheses
+### Auto-resolving hypothesis tasks
 
-A hypothesis has no separate work to execute — its source already states the claim — so fill its output and close it on creation. It still gets the same files on disk as any task (`output.json` and `output.md` under `.asta/tasks/<id>/`).
+When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
 
-1. From its source — a `literature_review` gap, or an `auto_discovery` surprising node — write `statement`, `rationale`, `falsifiable_prediction`, and `expected_evidence`.
-2. Follow the template's hypothesis row. For `data_driven_theory_generation`, the claim is the node's finding and the `rationale` cites that node by id (it's added to `inputs`) — every hypothesis traces to a specific finding.
-3. Write `output.json` and `output.md` (the readable hypothesis; link any law rather than writing a bare `node_x_y`).
-4. Check it: `scripts/validate-output.sh hypothesis <metadata-file> .asta/tasks/<id>`.
-5. Save the metadata (`scripts/write-meta.sh` + `bd update <id> --metadata @<path>`) and `bd close <id>`.
+1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
+   - `statement` — `H_n: <one-sentence claim>`
+   - `rationale` — why the source implies the claim (for a finding, cite its node id)
+   - `falsifiable_prediction` — what observation would refute it
+   - `expected_evidence` — list of concrete evidence types that would support it
+2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
+3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
 
-If a gap is too thin to fill honestly, leave the hypothesis open for a real `execute` pass instead.
+If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
 
 ## After either mode
 
-Hand off to **update-summary**.
+Hand off to **update-summary** so `summary.md` reflects the new state.
 
 ## Not here
 

From 45d82368305f55f25d7ad40c4de1073e792e7ff6 Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Thu, 11 Jun 2026 23:23:24 -0700
Subject: [PATCH 3/6] Add workflow concept to research step schemas; expand
 taxonomy

---
 .../skills/research-step/SKILL.md             |  39 +-
 .../skills/research-step/assets/schemas.yaml  | 512 ++++++++++++---
 .../research-step/scripts/close-task.sh       |  44 ++
 .../research-step/scripts/create-task.sh      |  27 +
 .../research-step/scripts/validate-output.sh  | 199 ++----
 .../research-step/scripts/write-meta.sh       |  33 -
 .../data_driven_theory_generation.md          | 118 ----
 .../templates/examples/report_example.tex     | 620 ------------------
 .../examples/theorizer_mission_example.md     |  78 ---
 .../templates/hypothesis_driven_research.md   |  50 --
 .../research-step/workflows/brainstorm.md     |  10 +-
 .../skills/research-step/workflows/execute.md |  32 +-
 .../skills/research-step/workflows/init.md    |   2 +-
 .../skills/research-step/workflows/plan.md    | 121 ++--
 .../research-step/workflows/update-summary.md |  18 +-
 plugins/asta/skills/research-step/SKILL.md    |  39 +-
 .../skills/research-step/assets/schemas.yaml  | 512 ++++++++++++---
 .../research-step/scripts/close-task.sh       |  44 ++
 .../research-step/scripts/create-task.sh      |  27 +
 .../research-step/scripts/validate-output.sh  | 199 ++----
 .../research-step/scripts/write-meta.sh       |  33 -
 .../data_driven_theory_generation.md          | 118 ----
 .../templates/examples/report_example.tex     | 620 ------------------
 .../examples/theorizer_mission_example.md     |  78 ---
 .../templates/hypothesis_driven_research.md   |  50 --
 .../research-step/workflows/brainstorm.md     |  10 +-
 .../skills/research-step/workflows/execute.md |  32 +-
 .../skills/research-step/workflows/init.md    |   2 +-
 .../skills/research-step/workflows/plan.md    | 121 ++--
 .../research-step/workflows/update-summary.md |  18 +-
 skills/research-step/SKILL.md                 |  39 +-
 skills/research-step/assets/schemas.yaml      | 512 ++++++++++++---
 skills/research-step/scripts/close-task.sh    |  44 ++
 skills/research-step/scripts/create-task.sh   |  27 +
 .../research-step/scripts/validate-output.sh  | 199 ++----
 skills/research-step/scripts/write-meta.sh    |  33 -
 .../data_driven_theory_generation.md          | 118 ----
 .../templates/examples/report_example.tex     | 620 ------------------
 .../examples/theorizer_mission_example.md     |  78 ---
 .../templates/hypothesis_driven_research.md   |  50 --
 skills/research-step/workflows/brainstorm.md  |  10 +-
 skills/research-step/workflows/execute.md     |  32 +-
 skills/research-step/workflows/init.md        |   2 +-
 skills/research-step/workflows/plan.md        | 121 ++--
 .../research-step/workflows/update-summary.md |  18 +-
 45 files changed, 1914 insertions(+), 3795 deletions(-)
 create mode 100755 plugins/asta-preview/skills/research-step/scripts/close-task.sh
 create mode 100755 plugins/asta-preview/skills/research-step/scripts/create-task.sh
 delete mode 100755 plugins/asta-preview/skills/research-step/scripts/write-meta.sh
 delete mode 100644 plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
 delete mode 100644 plugins/asta-preview/skills/research-step/templates/examples/report_example.tex
 delete mode 100644 plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md
 delete mode 100644 plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
 create mode 100755 plugins/asta/skills/research-step/scripts/close-task.sh
 create mode 100755 plugins/asta/skills/research-step/scripts/create-task.sh
 delete mode 100755 plugins/asta/skills/research-step/scripts/write-meta.sh
 delete mode 100644 plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
 delete mode 100644 plugins/asta/skills/research-step/templates/examples/report_example.tex
 delete mode 100644 plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md
 delete mode 100644 plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
 create mode 100755 skills/research-step/scripts/close-task.sh
 create mode 100755 skills/research-step/scripts/create-task.sh
 delete mode 100755 skills/research-step/scripts/write-meta.sh
 delete mode 100644 skills/research-step/templates/data_driven_theory_generation.md
 delete mode 100644 skills/research-step/templates/examples/report_example.tex
 delete mode 100644 skills/research-step/templates/examples/theorizer_mission_example.md
 delete mode 100644 skills/research-step/templates/hypothesis_driven_research.md

diff --git a/plugins/asta-preview/skills/research-step/SKILL.md b/plugins/asta-preview/skills/research-step/SKILL.md
index 3735bb5..49a7fec 100644
--- a/plugins/asta-preview/skills/research-step/SKILL.md
+++ b/plugins/asta-preview/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. Each unit of work is a typed sub-issue whose `metadata.research_step.output` matches a JSON schema in `assets/schemas.yaml`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -31,41 +31,12 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
-## Plan templates
-
-A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
-
-- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
-- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
-- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
-- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
-- Don't add tasks the template doesn't have.
-
-Available templates:
-
-| Name | Purpose |
-|---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
-| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
-
-### Task outputs
-
-Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
-
-| Path | Role |
-|---|---|
-| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
-| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
-| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
-
-Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
-
 ## Routing
 
 ### 1. Honor explicit requests
@@ -80,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta-preview/skills/research-step/assets/schemas.yaml b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
index 888db1b..b9643b3 100644
--- a/plugins/asta-preview/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
@@ -1,82 +1,436 @@
-# Output shapes for research-step tasks. Each task stores its output at
-# metadata.research_step.output, matching the shape under `output:` for its type.
-# Wiring (which task feeds which) lives in the templates, not here.
-
-schema_version: 1
-
-task_types:
-
-  scope:
-    output:
-      question: string                   # the precise research question
-      boundaries: [string]               # what is in / out of scope
-      success_criteria: [string]         # how we know we have answered it
-
-  definitions:
-    output:
-      terms:
-        - name: string
-          operational_definition: string
-          rationale: string
-
-  literature_review:
-    output:
-      summary_path: string               # relative path; long-form context
-      key_findings: [string]             # 3-10 bullets readable without opening summary_path
-      gaps: [string]                     # gaps that motivate hypotheses
-      citations:
-        - id: string
-          title: string
-          url: string
-          relevance: string
-
-  hypothesis:
-    output:
-      statement: string                  # H_n: ...
-      rationale: string
-      falsifiable_prediction: string
-      expected_evidence: [string]
-
-  experiment_design:
-    output:
-      method: string
-      procedure: [string]                # ordered steps
-      variables:
-        independent: [string]
-        dependent: [string]
-        controls: [string]
-      artifacts_expected: [string]       # paths the gathering step will produce
-
-  evidence_gathering:
-    output:
-      artifacts:
-        - path: string
-          kind: string                   # data | log | figure | code | other
-          description: string
-      log_path: string                   # what was actually run
-      deviations: [string]               # ways execution diverged from design
+version: 1
 
-  auto_discovery:
-    output:
-      runid: string                      # the AutoDS run (created or imported)
-      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
-      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
-      surprising_nodes:
-        - id: string                     # e.g. node_3_0
-          surprise: number
-          finding: string
+enums:
+  outcome:               [held, partial, failed, n/a]
+  testability:           [tested, proxy_only, untestable]
+  construct_equivalence: [equivalent, proxy, mismatch]
+  feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
+  independence_axis:     [region, instrument, method, construct, temporal, population]
+  generation_objective:  [accuracy_focused, novelty_focused]
+  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  novelty:               [established, derivable, genuinely_new]
+  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  priority:              [high, medium, low]
+  access_status:         [acquired, open_unfetched, restricted, not_found]
+  holdout_verdict:       [held, failed, untested]
+
+types:
+
+  artifact:
+    artifactId: string
+    name: string
+    description: string
+    parts: [object]
+    metadata: object
+
+  experiment:
+    experiment_id: string
+    status: string
+    hypothesis: string
+    analysis: string
+
+  empirical_law:
+    id: string
+    statement: string
+    construct: string
+    source_operationalization: string
+    source_node: string
+    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    grouping_rationale: string
+    outcome: outcome                       
+    testability: testability              
+    independence_axes: [independence_axis]
+    effect_size_source: string
+    effect_size_reproduction: string
+    replication_path: string
+
+  dataset:
+    id: string
+    definition: string
+    source: string
+    n: number
+    sampling: string
+    variables: [string]
+    covers_laws: [string]
+
+  data_source:                       # links a run dataset to the paper and repository it came from
+    id: string
+    dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
+    paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
+    paper_title: string
+    paper_url: string
+    data_availability: string        # the paper's data-availability statement, verbatim or summarized
+    repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
+    identifier: string               # DOI / accession / direct URL for the data
+    access_status: access_status     # acquired | open_unfetched | restricted | not_found
+    local_path: string               # repo-root-relative path once acquired (else empty)
+    covers_laws: [string]
+
+  cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
+    id: string
+    research_question: string        # the intent the discovery runs against (from mission.md)
+    inclusion_criteria: string
+    exclusion_criteria: string
+    sampling: string
+    source_data_sources: [string]    # data_source ids the cohort was assembled from
+    discovery_subset: {definition: string, n: number, path: string}   # what discovery sees
+    holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
+    run_id: string                   # the stood-up auto-ds run (autodiscovery create)
+
+  reproduction_design:
+    law_id: string
+    experiment_name: string
+    plain_language_description: string
+    original_operationalization: string
+    independent_operationalization: string
+    construct_equivalence: construct_equivalence
+    feasibility: feasibility
+    required_data: string
+    data_gap: string
 
   analysis:
-    output:
-      verdict: enum [supported, refuted, inconclusive]
-      confidence: number                 # 0.0 - 1.0
-      reasoning: string
-      caveats: [string]
-
-  synthesis:
-    output:
-      answer: string                     # answer to scope.question
-      supporting_hypotheses: [bd_id]
-      refuted_hypotheses: [bd_id]
-      open_questions: [string]           # become discovered-from edges on re-plan
-      report_path: string                # generated markdown report
+    final_answer: string
+    assumptions: [string]
+    figures: [{caption: string, image: string}]
+    code: string
+
+  audit_report:
+    subject_id: string                     
+    analysis_id: string
+    challenges: [{concern: string, check: string, outcome: string}]
+    artifacts_found: [string]
+    verdict_survives: boolean
+    recommended_adjustment: string
+
+  extracted_data:
+    id: string
+    run_id: string
+    paper_id: string
+    extraction_schema_id: string
+    rows:
+      - name_short: string
+        name_full: string
+        brief_description: string
+        citation_title: string
+        uuid: string
+
+  theory:
+    id: string
+    name: string
+    description: string
+    theory_query: string
+    objective: generation_objective
+    grounds_law_ids: [string]
+    supporting_evidence_ids: [string]
+    components:
+      theory_statements:
+        - statement_name: string
+          theory_statement: string
+          supporting_evidence: [{text: string, uuids: [string]}]
+          conflicting_evidence: [{text: string, uuids: [string]}]
+      new_predictions_likely: [string]
+      new_predictions_unknown: [string]
+      unaccounted_for: [{text: string, uuids: [string]}]
+
+  testability_triage:
+    assessments:
+      - theory_id: string
+        testable_now: boolean
+        available_data: string
+        required_data: string
+        proposed_test: string
+        gap: string
+    testable_theory_ids: [string]
+
+  theory_evaluation:
+    id: string
+    theory_id: string
+    novelty: novelty
+    overall_support_or_contradict: string
+    overall_support_or_contradict_explanation: string
+
+  verification:
+    theory_id: string
+    prediction: string
+    verdict: verification_verdict
+    effect_size: string
+    data_used: string
+    audit_survived: boolean
+    analysis_id: string
+
+  next_run_proposal:
+    kind: next_step_kind
+    title: string
+    tests: [string]
+    data_needed: string
+    expected_signature: string
+    priority: priority
+
+  # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
+  # theory_report, verification_report), one standalone data-gaps report, and a
+  # theory-led master (research_report). Each carries report_path (the .md deliverable
+  # written first), a title, a one-line headline, a typed body, and `links` back to the
+  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
+  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+
+  provenance_report:
+    report_path: string
+    title: string
+    headline: string
+    sources:
+      - dataset_id: string
+        paper_title: string
+        paper_url: string
+        repository: string
+        access_status: access_status
+        local_path: string
+    acquired: [string]
+    not_acquired: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  reproduction_report:
+    report_path: string
+    title: string
+    headline: string
+    method_note: string
+    laws_ledger:
+      - law_id: string
+        statement: string
+        outcome: outcome
+        testability: testability
+        effect_size_source: string
+        effect_size_reproduction: string
+        independence_axes: [independence_axis]
+        evidence: string
+    what_held: [string]
+    what_failed_or_untestable: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  theory_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theories:
+      - theory_id: string
+        name: string
+        objective: generation_objective
+        one_line: string
+        grounds_law_ids: [string]
+        novelty: novelty
+        testable_now: boolean
+        supporting_evidence_ids: [string]
+    novelty_summary: string
+    new_predictions: [string]
+    open_threads: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  verification_report:
+    report_path: string
+    title: string
+    headline: string
+    novelty_by_verification:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verdict: verification_verdict
+        effect_size: string
+        data_used: string
+        audit_survived: boolean
+    what_was_tested: string
+    what_could_not_be_tested: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  data_gaps_report:
+    report_path: string
+    title: string
+    headline: string
+    gaps:
+      - item: string
+        missing_data: string
+        blocks: string
+        severity: priority
+        arose_in: string
+    next_steps: [next_run_proposal]
+    links: [{label: string, ref: string}]
+
+  research_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theory_highlights:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verification: verification_verdict
+    inference_chain: [{claim: string, chain: [string]}]
+    what_was_done: [string]
+    sub_reports: [{kind: string, report_path: string, one_line: string}]
+    tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+
+  discovery_report:                  # synthesis output of the auto_discovery flow
+    report_path: string
+    title: string
+    headline: string
+    laws:
+      - law_id: string
+        statement: string
+        surprise: number             # the discovery run's surprise signal for this candidate law
+        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
+        effect_size: string
+    next_steps: [next_run_proposal]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+tasks:
+  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
+  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
+  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
+  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
+  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
+  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
+  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
+  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
+  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
+  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
+  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
+  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
+  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
+  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
+  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
+  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
+  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
+  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
+  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
+  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
+  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
+  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
+  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
+  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
+  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
+  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+
+flows:
+
+  data_and_literature_grounded_theory_generation:
+    mission: Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.
+    data_provenance:
+      mission: Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.
+      chain:
+        - {workflow: data_provenance, mission: Source the papers and datasets the run named in the mission was built on; acquire the open data and record what is restricted.}
+    reproduction:
+      mission: Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.
+      chain:
+        - {workflow: reproduction, mission: Import the run named in the mission; reproduce each law on independent data with construct-equivalence and a feasibility gate.}
+    theorizer:
+      mission: Generate literature- and data-grounded theories of the reproduced laws and score their novelty.
+      chain:
+        - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
+    verification:
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      analysis:
+        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_verification:
+        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+        chain: []
+    verification_synthesis:
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      chain: []
+    gap_synthesis:
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      chain: []
+    final_synthesis:
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      chain: []
+
+  data_provenance:
+    mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
+    provenance_search:
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      chain: [asta literature find, asta papers search]
+    provenance_extraction:
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    data_acquisition:
+      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      chain: [asta documents, asta autodiscovery upload]
+    provenance_synthesis:
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      chain: []
+
+  reproduction:
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    data_driven_discovery:
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      chain: [asta autodiscovery run, asta autodiscovery experiments]
+    law_extraction:
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      chain: []
+    evidence_gathering:
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
+    replication:
+      mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
+      reproduction_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+        chain: [asta experiment]
+      analysis:
+        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduction_audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduce:
+        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+        chain: []
+    reproduction_synthesis:
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      chain: []
+
+  theorizer:
+    mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
+    evidence_extraction:
+      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    theory_generation:
+      mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
+      theory_formation:
+        mission: Form theories from the shared extraction store under this branch's objective.
+        chain: [asta generate-theories form-theory]
+    testability_triage:
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      chain: []
+    novelty_assessment:
+      mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      chain: [asta generate-theories evaluate-novelty]
+    theory_synthesis:
+      mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      chain: []
+
+  auto_discovery:
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    cohort_assembly:
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
+    discovery_run:
+      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      chain: [asta autodiscovery submit, asta autodiscovery experiments]
+    replication:
+      mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      holdout_replication:
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+    discovery_synthesis:
+      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      chain: []
diff --git a/plugins/asta-preview/skills/research-step/scripts/close-task.sh b/plugins/asta-preview/skills/research-step/scripts/close-task.sh
new file mode 100755
index 0000000..673b23f
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/scripts/close-task.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# close-task.sh <issue-id> <output-json> <output-markdown>
+# Publish a task's output and finish it: write output_json + output_markdown into the issue
+# metadata, validate output_json against the schema, close the issue, assert it closed, then
+# close any ancestor group whose last child just closed.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+[[ $# -eq 3 ]] || { echo "usage: close-task.sh <issue-id> <output-json> <output-markdown>" >&2; exit 1; }
+id="$1"; oj="$2"; om="$3"
+[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; }
+[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; }
+jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; }
+
+# 1. publish: merge output_json + output_markdown into the existing research_step metadata
+cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
+merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
+  '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
+tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+bd update "$id" --metadata @"$tmp" >/dev/null
+
+# 2. validate structurally (reads the issue back; no style lint)
+bash "$here/validate-output.sh" "$id"
+
+# 3. close and 4. assert closure
+bd close "$id" >/dev/null
+[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \
+  || { echo "close-task: $id did not close" >&2; exit 2; }
+echo "closed $id"
+
+# 5. cascade: close each ancestor group whose direct children are all closed
+cur_id="$id"
+while [[ "$cur_id" == *.* ]]; do
+  parent="${cur_id%.*}"
+  bd show "$parent" --json >/dev/null 2>&1 || break
+  open_kids="$(bd list --json | jq --arg p "$parent" '
+    [ .[]
+      | select(.id | startswith($p + "."))
+      | select((.id[($p|length)+1:] | contains(".")) | not)
+      | select(.status != "closed") ] | length')"
+  [[ "$open_kids" -eq 0 ]] || break
+  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  cur_id="$parent"
+done
diff --git a/plugins/asta-preview/skills/research-step/scripts/create-task.sh b/plugins/asta-preview/skills/research-step/scripts/create-task.sh
new file mode 100755
index 0000000..6024cf6
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/scripts/create-task.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# create-task.sh <parent-id> <task_type> <flow> <title> <brief-description> [input-id ...]
+# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description,
+# and initialized research_step metadata. output_json / output_markdown stay null until
+# execute publishes them via close-task.sh. Prints the new issue id.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
+parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
+
+python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
+PY
+
+[[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
+[[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
+[[ "${#desc}" -le 200 ]]    || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; }
+
+if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
+meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
index ab46d65..af3b8f6 100755
--- a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
@@ -1,166 +1,43 @@
 #!/usr/bin/env bash
-# validate-output.sh — structural validation of a research_step output JSON.
-#
-# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
-#
-# Verifies that the JSON file:
-#   1. parses
-#   2. carries the metadata envelope
-#      ({research_step: {task_type, inputs, output_schema_version, output}})
-#   3. has every required `output.<key>` for the given <task_type> per
-#      assets/schemas.yaml (schema_version: 1)
-# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md.
-#
-# Exit codes:
-#   0  — valid
-#   2  — JSON parse error
-#   3  — unknown task_type
-#   4  — missing required field
-#   5  — task_type mismatch with envelope
-#   6  — required output.md missing (only when [task-dir] supplied)
-#   7  — output.md empty or a stub (only when [task-dir] supplied)
-#   8  — output.md has no markdown links (only when [task-dir] supplied)
-#   9  — a named entity is unlinked (only when [task-dir] supplied)
-#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
-#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
-#           <3 embedded figures (14), a required section is missing (15)
-#
-# Structural checks only — required fields, working links, and the report's basic pieces.
+# validate-output.sh <issue-id> — structural check of a task's stored output_json.
+# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
+# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
+# No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
 set -euo pipefail
-
-if [[ $# -lt 2 || $# -gt 3 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
-  exit 1
-fi
-
-task_type="$1"
-file="$2"
-task_dir="${3:-}"
-
-if ! jq -e . "$file" > /dev/null 2>&1; then
-  echo "validate-output: $file is not valid JSON" >&2
-  exit 2
-fi
-
-# Required output fields, mirroring assets/schemas.yaml (schema_version: 1).
-case "$task_type" in
-  scope)              required="question boundaries success_criteria" ;;
-  definitions)        required="terms" ;;
-  literature_review)  required="summary_path key_findings gaps citations" ;;
-  hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
-  experiment_design)  required="method procedure variables artifacts_expected" ;;
-  evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
-  analysis)           required="verdict confidence reasoning caveats" ;;
-  synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
-  *)
-    echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
-    exit 3
-    ;;
-esac
-
-# The envelope must carry the matching task_type so we don't validate scope JSON
-# against an analysis schema by accident.
-envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
-if [[ -z "$envelope_type" ]]; then
-  echo "validate-output: $file missing .research_step.task_type" >&2
-  exit 5
-fi
-if [[ "$envelope_type" != "$task_type" ]]; then
-  echo "validate-output: envelope task_type='$envelope_type' but expected '$task_type'" >&2
-  exit 5
-fi
-
-# Envelope shape.
-for key in inputs output_schema_version output; do
-  if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: $file missing .research_step.$key" >&2
-    exit 5
-  fi
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
+id="$1"
+
+rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // empty')"
+[[ -n "$rs" ]] || { echo "validate-output: $id has no metadata.research_step" >&2; exit 2; }
+task_type="$(jq -r '.task_type // empty' <<<"$rs")"
+[[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
+
+expected="$(python3 - "$schemas" "$task_type" <<'PY'
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+t = d["tasks"].get(sys.argv[2])
+if t is None: sys.exit(3)
+print(" ".join(t["output"]))
+PY
+)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+
+got="$(jq -c '.output_json // empty' <<<"$rs")"
+[[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
+
+for k in $expected; do
+  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
+    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
 done
-
-# Required output fields.
-for key in $required; do
-  if ! jq -e ".research_step.output | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: missing required field 'output.$key' for task_type '$task_type'" >&2
-    exit 4
-  fi
-done
-
-# Type spot-checks for the high-leverage cases. Not exhaustive — just the
-# fields where a wrong type at this layer would silently break update-summary rendering
-# or downstream tasks.
-case "$task_type" in
-  literature_review)
-    jq -e '.research_step.output.key_findings | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.key_findings must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.gaps | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.gaps must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.citations | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.citations must be an array" >&2; exit 4; }
-    ;;
-  analysis)
-    jq -e '.research_step.output.verdict | IN("supported", "refuted", "inconclusive")' "$file" >/dev/null \
-      || { echo "validate-output: output.verdict must be one of supported|refuted|inconclusive" >&2; exit 4; }
-    jq -e '.research_step.output.confidence | type == "number" and . >= 0 and . <= 1' "$file" >/dev/null \
-      || { echo "validate-output: output.confidence must be a number in [0, 1]" >&2; exit 4; }
-    ;;
-esac
-
-# output.md document-quality gate. Every task must produce a human-readable
-# output.md (skill "Task outputs" table) that links the entities it names.
-if [[ -n "$task_dir" ]]; then
-  md="$task_dir/output.md"
-  if [[ ! -f "$md" ]]; then
-    echo "validate-output: required output.md not found at '$md'" >&2
-    exit 6
-  fi
-  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
-    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
-    exit 7
-  fi
-  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
-    echo "validate-output: output.md has no markdown links" >&2
-    exit 8
-  fi
-  # Strip links, then flag any named entity still bare in output.md / report.tex.
-  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
-    [[ -f "$f" ]] && perl -ne '
-      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
-      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
-      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
-    ' "$f"
-  done) || true
-  if [[ -n "$unlinked" ]]; then
-    echo "$unlinked" >&2
-    echo "validate-output: named entities above are unlinked" >&2
-    exit 9
-  fi
-
-  # The report's basics. Only the report node makes report.tex; when it exists,
-  # check it has what report_example.tex has. Each failure points back to it.
-  rpt="$task_dir/artifacts/report.tex"
-  if [[ -f "$rpt" ]]; then
-    ref="templates/examples/report_example.tex"
-    rfail() {
-      echo "report-gate: $1" >&2
-      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
-      echo "     its depth and citation density before retrying." >&2
-      exit "$2"
-    }
-    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
-    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
-      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
-      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
-    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
-    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
-    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
-    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
-      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
-    done
-  fi
-fi
+while IFS= read -r k; do
+  case " $expected " in *" $k "*) ;; *)
+    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
+  esac
+done < <(jq -r 'keys[]' <<<"$got")
+jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
+  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
 
 echo "ok"
diff --git a/plugins/asta-preview/skills/research-step/scripts/write-meta.sh b/plugins/asta-preview/skills/research-step/scripts/write-meta.sh
deleted file mode 100755
index 6e7d71a..0000000
--- a/plugins/asta-preview/skills/research-step/scripts/write-meta.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-# write-meta.sh — materialize a metadata JSON blob to a temp file and print
-# its path, suitable for `bd update <id> --metadata @<path>` or
-# `bd create ... --metadata=@<path>`.
-#
-# Reads JSON from stdin (or from $1 if a path is given), validates that it
-# parses, and writes it under $TMPDIR with mode 0600. The path is printed on
-# stdout so the caller can splice it into a bd command.
-#
-# Why this exists: `bd update --metadata` accepts either a JSON string or
-# `@file.json`. Inlining a JSON string requires `"$(cat /tmp/x.json)"` (a
-# non-bd shell op the SKILL.md frontmatter does not permit), and shell quoting
-# gets fragile with embedded quotes. Materializing a file once and using
-# `@path` keeps everything in `Bash(bd:*)` territory.
-set -euo pipefail
-
-tmp=$(mktemp -t research-step-meta.XXXXXX.json)
-trap 'rm -f "$tmp"' ERR
-
-if [[ $# -ge 1 ]]; then
-  cp "$1" "$tmp"
-else
-  cat > "$tmp"
-fi
-
-if ! jq -e . "$tmp" >/dev/null 2>&1; then
-  echo "write-meta: input is not valid JSON" >&2
-  rm -f "$tmp"
-  exit 2
-fi
-
-chmod 0600 "$tmp"
-echo "$tmp"
diff --git a/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
deleted file mode 100644
index 15a5875..0000000
--- a/plugins/asta-preview/skills/research-step/templates/data_driven_theory_generation.md
+++ /dev/null
@@ -1,118 +0,0 @@
----
-name: data_driven_theory_generation
-description: |
-  See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the most promising with new experiments.
----
-
-# Data-driven theory generation
-
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  data_provenance["Data provenance"]
-  definitions --> data_provenance
-  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
-  data_provenance --> auto_discovery
-  subgraph sub1["for each of the 10 surprising findings"]
-    direction TB
-    hypothesis["Restate finding"]
-    literature_review["Literature search"]
-    experiment_design["Pre-register test"]
-    evidence_gathering["Find independent data"]
-    analysis["Replicate"]
-    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
-    analysis -- "retry: inconclusive → re-spec" --> experiment_design
-    analysis -- "retry: bad data → re-locate" --> evidence_gathering
-  end
-  auto_discovery --> hypothesis
-  replication_synthesis["Replication summary (k of 10, by mechanism)"]
-  analysis --> replication_synthesis
-  theorizer_theories["Theorizer-grounded theories"]
-  replication_synthesis --> theorizer_theories
-  novelty["Score theories for novelty"]
-  theorizer_theories --> novelty
-  subgraph sub2["for each of the top 3 theories"]
-    direction TB
-    followon_exp_design["Pre-register experiment (AED)"]
-    followon_evidence["Find new data"]
-    followon_analysis["Run, or leave as a proposal"]
-    followon_exp_design --> followon_evidence --> followon_analysis
-  end
-  novelty --> followon_exp_design
-  report["Closing report"]
-  followon_analysis --> report
-  report --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
-| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
-| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
-| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
-| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
-| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
-| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
-| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
-| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
-| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
-| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
-
-The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
-
-## Running DataVoyager
-
-Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
-
-A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
-
-| What DataVoyager did | Go back to | Fix |
-|---|---|---|
-| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
-| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
-| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
-
-## mission.md
-
-- `run_pointer:` — the AutoDS run to import (omit to create one).
-- `datasets[]` — input dataset URIs for a new run.
-- A focus statement in the body — the question under study.
-
-Unless the user explicitly says to use local inputs only, fetch external public data for replication.
-
-## Writing the report and outputs
-
-These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
-
-- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
-- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
-- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
-
-  | thing | link to |
-  |---|---|
-  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
-  | paper | the asta document, paper URL, or `data_provenance` entry |
-  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
-  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
-  | dataset | the file under `inputs/`, or the Datasets appendix |
-  | experiment E-number | its appendix entry |
-
-- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
-- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
-- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/plugins/asta-preview/skills/research-step/templates/examples/report_example.tex b/plugins/asta-preview/skills/research-step/templates/examples/report_example.tex
deleted file mode 100644
index e87ebf5..0000000
--- a/plugins/asta-preview/skills/research-step/templates/examples/report_example.tex
+++ /dev/null
@@ -1,620 +0,0 @@
-% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
-%
-% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
-% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
-%
-% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
-% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
-% your run embeds its own figures from `artifacts/`.
-
-\documentclass[11pt]{article}
-\usepackage[margin=1in]{geometry}
-\usepackage{amsmath}
-\usepackage{amssymb}
-\usepackage{hyperref}
-\usepackage{booktabs}
-\usepackage{longtable}
-\usepackage{array}
-\usepackage{enumitem}
-\usepackage{xcolor}
-\usepackage{microtype}
-\usepackage{graphicx}
-\usepackage{titling}
-\usepackage{fancyhdr}
-\usepackage{titlesec}
-\usepackage{tabularx}
-\usepackage{tikz}
-\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
-\definecolor{paperfinderpurple}{HTML}{6D28D9}
-
-\hypersetup{
-  colorlinks=true,
-  linkcolor=blue!55!black,
-  urlcolor=blue!55!black,
-  citecolor=blue!55!black,
-}
-
-\pagestyle{fancy}
-\fancyhf{}
-\fancyhead[L]{Multi-Agent Computational Investigation}
-\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
-\fancyfoot[C]{\thepage}
-\renewcommand{\headrulewidth}{0.4pt}
-
-\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
-\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
-
-\setlength{\parskip}{0.5em}
-
-\begin{document}
-
-\begin{titlepage}
-\thispagestyle{empty}
-\vspace*{0.6in}
-\begin{center}
-{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
-\end{center}
-
-\vspace*{40pt}
-
-\noindent\makebox[\textwidth][c]{%
-\begin{tikzpicture}[
-  font=\footnotesize,
-  procbox/.style={
-    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  agentbox/.style={
-    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  paperbox/.style={
-    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  finalbox/.style={
-    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
-  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
-  node distance=0.45cm and 0.45cm,
-]
-% Band 1: discovery phase, left-to-right
-\node[procbox] (scope) {Scope \&\\Definitions};
-\node[procbox, right=of scope] (prov) {Data\\Provenance};
-\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
-\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
-\node[procbox, right=of laws] (themes) {Cluster\\Themes};
-
-% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
-\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
-\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
-\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
-\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
-\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
-
-\begin{scope}[on background layer]
-\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
-\end{scope}
-\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
-
-% Band 3: synthesis + follow-on, left-to-right
-\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
-\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
-\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
-\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
-\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
-
-\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
-
-% Band 1 arrows
-\draw[arr] (scope) -- (prov);
-\draw[arr] (prov) -- (ad);
-\draw[arr] (ad) -- (laws);
-\draw[arr] (laws) -- (themes);
-
-% Band 1 -> Band 2: straight down themes -> lit
-\draw[arr] (themes) -- (lit);
-
-% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
-\draw[arr] (lit) -- (hyp);
-\draw[arr] (hyp) -- (evid);
-\draw[arr] (evid) -- (exp);
-\draw[arr] (exp) -- (rep);
-
-% Retry self-loop on rep (black so it reads clearly)
-\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
-
-% Band 2 -> Band 3: straight down rep -> across
-\draw[arr] (rep) -- (across);
-
-% Band 3 arrows
-\draw[arr] (across) -- (theo);
-\draw[arr] (theo) -- (nov);
-\draw[arr] (nov) -- (aed);
-\draw[arr] (aed) -- (dv2);
-
-% Band 3 -> final report
-\draw[arr] (dv2) -- (rep_final);
-
-% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
-\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
-\end{tikzpicture}%
-}
-
-\vspace*{\fill}
-\begin{center}
-\footnotesize\itshape\color{gray!50!black}
-Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
-\end{center}
-\end{titlepage}
-
-\tableofcontents
-\newpage
-
-%---------------------------------------------------------------
-\section{Mission}
-
-This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
-
-We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
-
-The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
-
-%---------------------------------------------------------------
-\section{Abstract}
-
-We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
-
-\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
-\begin{itemize}[noitemsep]
-\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
-\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
-\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
-\end{itemize}
-
-\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
-
-\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
-
-%---------------------------------------------------------------
-\section{Background and Motivation}
-
-\subsection{The Pakistan WPV1 resurgence}
-
-Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
-\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
-\label{fig:national}
-\end{figure}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
-\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
-\label{fig:district}
-\end{figure}
-
-\subsection{The older-cohort hypothesis}
-
-Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
-
-\subsection{Prior AutoDiscovery findings}
-
-Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
-\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
-\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
-\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
-\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
-\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
-\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
-\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
-\end{description}
-
-These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
-
-%---------------------------------------------------------------
-\section{Methods}
-\label{sec:methods}
-
-\subsection{Data sources}
-
-The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
-
-\begin{itemize}[noitemsep]
-\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
-\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
-\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
-\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
-\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
-\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
-\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
-\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
-\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
-\end{itemize}
-
-\subsection{Computational agents and their roles}
-
-\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
-
-\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
-
-\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
-
-\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
-
-\subsection{AutoDiscovery curation and replication design}
-\label{sec:methods_ad}
-
-The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
-
-For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
-
-\subsection{Cross-source robustness experiments}
-
-Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
-
-\subsection{Theorizer runs}
-
-Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
-
-\subsection{AutoExperimentDesigner follow-on protocols}
-
-After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
-
-For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
-
-\subsection{Statistical procedures}
-
-\begin{itemize}[noitemsep]
-\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
-\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
-\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
-\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
-\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Results}
-
-The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
-\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
-\label{fig:matrix}
-\end{figure}
-
-\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
-
-The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
-
-The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
-E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
-E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L1.}
-\end{table}
-
-\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
-
-\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
-
-L2 was tested in three independent ways and was refuted in all three.
-
-In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
-
-Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
-\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
-\label{fig:subtype}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
-E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
-E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
-E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L2.}
-\end{table}
-
-\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
-
-\subsection{Law L3 --- Two-regime household contact intensity}
-
-L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
-
-\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
-
-\subsection{Law L4 --- Cross-border mobility mechanism}
-
-L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
-
-\begin{itemize}[noitemsep]
-\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
-\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
-\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
-\end{itemize}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
-\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
-\label{fig:pakafg}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
-E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
-E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
-E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L4.}
-\end{table}
-
-\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
-
-\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
-
-L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
-
-The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
-
-\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
-
-%---------------------------------------------------------------
-\section{Pre-Registered Confirmatory Test of the Combined Theory}
-\label{sec:final}
-
-\subsection{Background and rationale}
-
-The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
-
-This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
-
-\subsection{AutoExperimentDesigner pre-registered protocol}
-
-The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
-
-\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
-
-\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
-
-\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
-
-\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
-
-\subsection{DataVoyager execution}
-
-The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
-
-A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
-
-\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
-
-The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
-
-\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
-
-In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
-
-The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
-
-\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
-
-At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
-
-\subsection{Combined verdict}
-
-All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Component & Key statistic & Status \\
-\midrule
-P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
-P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
-P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
-Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
-\bottomrule
-\end{tabular}
-\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
-\end{table}
-
-%---------------------------------------------------------------
-\section{Trustworthiness Analysis}
-
-\subsection{What we can trust}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
-\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
-\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
-\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
-\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
-\end{itemize}
-
-\subsection{Key limitations}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
-\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
-\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
-\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
-\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
-\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
-\end{itemize}
-
-\subsection{Deviations from protocol}
-
-\begin{itemize}[noitemsep]
-\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
-\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Conclusions}
-
-\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
-
-\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
-
-\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
-
-\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
-
-\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
-
-%---------------------------------------------------------------
-\section{Future Directions}
-
-\begin{enumerate}[noitemsep]
-\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
-\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
-\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
-\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
-\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
-\end{enumerate}
-
-%---------------------------------------------------------------
-\appendix
-
-\section{Computational Experiment Catalogue}
-\label{app:experiments}
-
-This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
-
-\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
-
-\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
-
-\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
-
-\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
-
-\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
-
-\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
-
-\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
-
-\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
-
-\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
-
-\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
-
-\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
-
-\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
-
-\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
-
-\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
-
-\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
-
-\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
-
-\section{Datasets}
-\label{app:datasets}
-
-\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
-\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
-
-\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
-
-\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
-
-\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
-
-\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
-
-\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
-
-\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
-
-\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
-
-\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
-
-\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
-\end{description}
-
-\section{References}
-
-The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
-
-\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
-\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
-
-\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
-
-\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
-
-\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
-
-\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
-
-\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
-
-\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
-
-\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
-
-\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
-
-\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
-
-\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
-
-\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
-
-\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
-
-\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
-
-\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
-
-\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
-
-\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
-\end{description}
-
-\end{document}
diff --git a/plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md b/plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md
deleted file mode 100644
index acaa800..0000000
--- a/plugins/asta-preview/skills/research-step/templates/examples/theorizer_mission_example.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# Example theorizer mission statement
-
-This is a worked example of the **mission statement** passed to the theorizer in the
-`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
-run's `mission.md`; it is the prompt the theorizer receives once the per-theme
-reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
-and the per-theme findings.
-
-A well-formed theorizer mission does five things, and this example shows all five:
-
-1. **States the question** in one sentence, naming the phenomenon and the population of interest.
-2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
-3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
-4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
-5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
-
-Tagging each finding/question/constraint with its supporting experiment is what keeps
-the returned theories anchorable: downstream, `theorizer_theories` drops any theory
-without ≥1 law anchor, and this structure makes the anchor explicit.
-
----
-
-```
-Mission: Generate theories that explain the role of populations aged 5+ years in
-Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
-findings and the open questions they leave unresolved.
-
-SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
-  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
-      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
-  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
-      strengthening significantly after 2021 (X2).
-  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
-      by under-5 population share, with under-5 share dominating 15-64 working-age
-      share (T2 retry-1).
-  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
-      dominates in adult-heavy districts (X4, p<0.001).
-  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
-      scale (T5 retry-0/1).
-  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
-      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
-      (X7).
-
-OPEN QUESTIONS (theories should address at least one):
-  Q1. What replaced national Pol3 coverage as the dominant transmission lever
-      after 2018-2019?
-  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
-      drives the case coupling intensification?
-  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
-      in young districts) appear?
-  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
-      they are NOT the dominant district-level predictor but ARE plausibly the
-      operative mobility vectors?
-
-CONSTRAINTS (refuted framings to avoid):
-  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
-      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
-  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
-      refuted at district by T2, at province by T2 retry-4, on silent-transmission
-      signature by X3, and on subtype contrast by X4.
-  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
-      by T5 retry-0/1.
-  C4. Theories centered on resident Afghan refugee populations as a static mobility
-      channel — refuted by X7.
-
-REWARDED FRAMINGS:
-  R1. Theories that explain the 2018-2019 break date in terms of immunological,
-      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
-  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
-      deportations, seasonal transit) consistent with the post-2021 intensification.
-  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
-      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
-      while WPV1 retains a pediatric profile.
-  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
-      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
-  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
-      (mobility) — the two laws DV reproduced.
-```
diff --git a/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md b/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
deleted file mode 100644
index eb3c847..0000000
--- a/plugins/asta-preview/skills/research-step/templates/hypothesis_driven_research.md
+++ /dev/null
@@ -1,50 +0,0 @@
----
-name: hypothesis_driven_research
-description: |
-  Literature-grounded hypothesis generation. Survey the literature, raise a
-  hypothesis per gap, test each, and write a closing report.
----
-
-# Hypothesis-driven research
-
-Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  lit_review["Literature review"]
-  definitions --> lit_review
-  subgraph sub1["for each gap"]
-    direction TB
-    hypothesis["Hypothesis"]
-    experiment_design["Experiment design"]
-    evidence_gathering["Evidence gathering"]
-    analysis["Analysis"]
-    hypothesis --> experiment_design --> evidence_gathering --> analysis
-  end
-  lit_review --> hypothesis
-  closing["Closing synthesis"]
-  analysis --> closing
-  closing --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | One line: the question under study. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
-| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
-| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
-| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
-| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
-
-The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
index 884f48f..250ba36 100644
--- a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
+++ b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
@@ -25,7 +25,7 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
@@ -41,10 +41,10 @@ Pick the branch that matches; do not run more than one.
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
-| Single issue's full `metadata.research_step.output` | `bd show <id> --json`                                                                                  |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list`                                                                                              |
-| Dependency structure | `bd dep tree <epic-id> --direction up`|
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output.summary_path` referenced from the digest                         |
+| Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
+| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
+| Task tree | `bd list --json` — ids encode the parent-child outline |
+| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
 | Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
diff --git a/plugins/asta-preview/skills/research-step/workflows/execute.md b/plugins/asta-preview/skills/research-step/workflows/execute.md
index 3d1a84f..a8596e2 100644
--- a/plugins/asta-preview/skills/research-step/workflows/execute.md
+++ b/plugins/asta-preview/skills/research-step/workflows/execute.md
@@ -1,35 +1,33 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
 
 ## Preconditions
 
 - An epic root exists (`scripts/epic-root.sh` prints `status: found`).
-- `bd ready --json` is non-empty, **or** the caller supplied a specific task ID that is currently `open` and unblocked.
+- An open issue with a `task_type` exists, **or** the caller supplied a specific `open` task ID.
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
+1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
 2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
+3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
+4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
+7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
 
-   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
-6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
+## Notes on output
 
-## Notes on output files
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
 
-Schema fields ending in `_path` are relative paths. Conventions:
+Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `summary_path` (from `literature_review`) → `background_knowledge.txt` by convention, but any path works.
-- `log_path` (from `evidence_gathering`) → typically under `logs/`.
-- `report_path` (from `synthesis`) → typically `report.md`.
+- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
 
-Write the file before setting the output JSON. If the executor crashes between writing the file and closing the issue, the file is harmless orphan data — re-running `execute` on the same issue will overwrite it.
+If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
 ## Out of scope for this workflow
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/init.md b/plugins/asta-preview/skills/research-step/workflows/init.md
index 4df19c0..fd11be3 100644
--- a/plugins/asta-preview/skills/research-step/workflows/init.md
+++ b/plugins/asta-preview/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
+After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
 ## Preconditions
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/plan.md b/plugins/asta-preview/skills/research-step/workflows/plan.md
index 06ae941..a000e2d 100644
--- a/plugins/asta-preview/skills/research-step/workflows/plan.md
+++ b/plugins/asta-preview/skills/research-step/workflows/plan.md
@@ -1,89 +1,92 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. The flow chains live in `assets/schemas.yaml` (`flows`) — plan reads them, it does not hardcode the sequence. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet: pick a flow and lay its first step(s).
+- **replan** — an epic exists: after a step closes, add the next step(s) in its flow chain.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized (else run **init**).
+- **bootstrap**: `mission.md` exists; no epic yet (`scripts/epic-root.sh` → `none`).
+- **replan**: an epic exists; either `execute` supplied the closed source task, or the user named what to extend.
 
-## Issue metadata convention
+## Task metadata
 
-Every task issue carries:
+Create task leaves with `scripts/create-task.sh <parent> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]`. It sets `metadata.research_step = {flow, task_type, inputs, output_schema_version, output_json: null, output_markdown: null}` and a **brief one-line `description`** (it rejects a missing, multi-line, or over-long description). `execute` later publishes `output_json` (the structured result) and `output_markdown` (the narrative) via `close-task.sh`; the description is not overwritten. The epic carries `epic_root: true`; group nodes (loops, fan-outs, branches) are epics created with `bd create --parent <parent> -t epic` (no task_type, no description rules). A session may run several flows — the flow is per task, not per epic.
 
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
+## Indentation is the tree
 
-The mission epic additionally carries `epic_root: true`.
+The flow in `assets/schemas.yaml` is an indented outline, and the beads graph you build **is that same outline**: each indentation level in the flow becomes one parent-child level in beads. Build it with `bd create --parent`, walking the flow top-down, so hierarchical ids (`wf`, `wf.1`, `wf.1.1`, …) encode the outline position. There are **no `blocks`/`deps` edges** — ordering is the id order, because you create nodes in the order they run.
 
-## Mode selection
+Reading a flow node:
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
-## Bootstrap mode
+The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
-   ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
-   bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
-   ```
-3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
-4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
-5. **Report.** Print the epic ID and the created task IDs.
+```
+wf                      [epic]    <mission>
+ wf.1                   [loop]    reproduction
+  wf.1.1                          data_driven_discovery
+  wf.1.2                          law_extraction
+  wf.1.3                          evidence_gathering
+  wf.1.4                [fan-out] replication            one branch per law
+   wf.1.4.1             [branch]  <law>
+    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.2                    analysis
+    wf.1.4.1.3                    reproduction_audit
+    wf.1.4.1.4                    reproduce
+   wf.1.4.2             [branch]  <law> …
+  wf.1.5                          reproduction_synthesis
+```
 
-## Replan mode
+The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproduction, `wf.3` theorizer, `wf.4` verification (one branch per testable theory), `wf.5` verification_synthesis, `wf.6` gap_synthesis, `wf.7` final_synthesis. Each sub-flow ends in its own synthesis step that emits a report (provenance_report, reproduction_report, theory_report, verification_report); gap_synthesis aggregates their gaps into data_gaps_report and final_synthesis writes the theory-led research_report.
 
-Read the source task's task_type and output:
+## Ordering and closing (no edges)
 
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+
+## Static vs data-dependent fan-outs
+
+- **Static** (`theory_generation` by objective): both branches are known up front → create them together.
+- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+
+## Gates (replan)
 
-Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
+- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
 
-- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
-- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
-- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
+## Bootstrap
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
+2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
 
-### Auto-resolving hypothesis tasks
+## Replan
 
-When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
+When a step closes, create the next node(s) under their parent, in flow order:
 
-1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why the source implies the claim (for a finding, cite its node id)
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
+- Apply the **Gates** rules above.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**. There are no edges to verify — the parent-child tree is the whole structure.
 
-## Not here
+## Out of scope
 
-Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
+- Running tasks or producing outputs (**execute**).
+- Environment setup (**init**); editing `mission.md` (**brainstorm**); judging output quality.
diff --git a/plugins/asta-preview/skills/research-step/workflows/update-summary.md b/plugins/asta-preview/skills/research-step/workflows/update-summary.md
index a79f6ff..311c81a 100644
--- a/plugins/asta-preview/skills/research-step/workflows/update-summary.md
+++ b/plugins/asta-preview/skills/research-step/workflows/update-summary.md
@@ -15,11 +15,10 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** All you need to fill the template comes from a few `bd` queries:
-   - `bd list --json` for the full tree (issue_count, status partition).
-   - `bd ready --json` for the ready list (also drives the Next Steps section).
-   - `bd blocked --json` for the blocked count.
-   Project each list to `{id, task_type: .metadata.research_step.task_type, title}` with `jq` and partition by `.status`.
+3. **Gather state inline.** Everything comes from `bd list --json`:
+   - the full tree (issue_count, status partition);
+   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
+   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
 5. **Overwrite `summary.md`** using this template:
 
@@ -61,13 +60,12 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Ready: <n> — IDs: <list>
-   - Blocked: <n>
+   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
 
    ### Next Steps
-   <from `bd ready --json`: one bullet per ready issue, formatted as
+   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If `bd ready` is empty, write "No ready tasks — graph is blocked or complete.">
+   If there are no open task issues, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)
@@ -79,4 +77,4 @@ Any reader (human or agent) checks freshness by running `scripts/summary-check.s
 ## Out of scope for this workflow
 
 - Mutating beads. `update-summary` is read-only against `.beads/`.
-- Re-planning. Even if `bd ready` is empty and the graph is incomplete, `update-summary` does not create issues.
+- Re-planning. Even if no open tasks remain and the graph is incomplete, `update-summary` does not create issues.
diff --git a/plugins/asta/skills/research-step/SKILL.md b/plugins/asta/skills/research-step/SKILL.md
index 3735bb5..49a7fec 100644
--- a/plugins/asta/skills/research-step/SKILL.md
+++ b/plugins/asta/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. Each unit of work is a typed sub-issue whose `metadata.research_step.output` matches a JSON schema in `assets/schemas.yaml`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -31,41 +31,12 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
-## Plan templates
-
-A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
-
-- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
-- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
-- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
-- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
-- Don't add tasks the template doesn't have.
-
-Available templates:
-
-| Name | Purpose |
-|---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
-| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
-
-### Task outputs
-
-Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
-
-| Path | Role |
-|---|---|
-| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
-| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
-| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
-
-Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
-
 ## Routing
 
 ### 1. Honor explicit requests
@@ -80,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta/skills/research-step/assets/schemas.yaml b/plugins/asta/skills/research-step/assets/schemas.yaml
index 888db1b..b9643b3 100644
--- a/plugins/asta/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta/skills/research-step/assets/schemas.yaml
@@ -1,82 +1,436 @@
-# Output shapes for research-step tasks. Each task stores its output at
-# metadata.research_step.output, matching the shape under `output:` for its type.
-# Wiring (which task feeds which) lives in the templates, not here.
-
-schema_version: 1
-
-task_types:
-
-  scope:
-    output:
-      question: string                   # the precise research question
-      boundaries: [string]               # what is in / out of scope
-      success_criteria: [string]         # how we know we have answered it
-
-  definitions:
-    output:
-      terms:
-        - name: string
-          operational_definition: string
-          rationale: string
-
-  literature_review:
-    output:
-      summary_path: string               # relative path; long-form context
-      key_findings: [string]             # 3-10 bullets readable without opening summary_path
-      gaps: [string]                     # gaps that motivate hypotheses
-      citations:
-        - id: string
-          title: string
-          url: string
-          relevance: string
-
-  hypothesis:
-    output:
-      statement: string                  # H_n: ...
-      rationale: string
-      falsifiable_prediction: string
-      expected_evidence: [string]
-
-  experiment_design:
-    output:
-      method: string
-      procedure: [string]                # ordered steps
-      variables:
-        independent: [string]
-        dependent: [string]
-        controls: [string]
-      artifacts_expected: [string]       # paths the gathering step will produce
-
-  evidence_gathering:
-    output:
-      artifacts:
-        - path: string
-          kind: string                   # data | log | figure | code | other
-          description: string
-      log_path: string                   # what was actually run
-      deviations: [string]               # ways execution diverged from design
+version: 1
 
-  auto_discovery:
-    output:
-      runid: string                      # the AutoDS run (created or imported)
-      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
-      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
-      surprising_nodes:
-        - id: string                     # e.g. node_3_0
-          surprise: number
-          finding: string
+enums:
+  outcome:               [held, partial, failed, n/a]
+  testability:           [tested, proxy_only, untestable]
+  construct_equivalence: [equivalent, proxy, mismatch]
+  feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
+  independence_axis:     [region, instrument, method, construct, temporal, population]
+  generation_objective:  [accuracy_focused, novelty_focused]
+  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  novelty:               [established, derivable, genuinely_new]
+  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  priority:              [high, medium, low]
+  access_status:         [acquired, open_unfetched, restricted, not_found]
+  holdout_verdict:       [held, failed, untested]
+
+types:
+
+  artifact:
+    artifactId: string
+    name: string
+    description: string
+    parts: [object]
+    metadata: object
+
+  experiment:
+    experiment_id: string
+    status: string
+    hypothesis: string
+    analysis: string
+
+  empirical_law:
+    id: string
+    statement: string
+    construct: string
+    source_operationalization: string
+    source_node: string
+    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    grouping_rationale: string
+    outcome: outcome                       
+    testability: testability              
+    independence_axes: [independence_axis]
+    effect_size_source: string
+    effect_size_reproduction: string
+    replication_path: string
+
+  dataset:
+    id: string
+    definition: string
+    source: string
+    n: number
+    sampling: string
+    variables: [string]
+    covers_laws: [string]
+
+  data_source:                       # links a run dataset to the paper and repository it came from
+    id: string
+    dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
+    paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
+    paper_title: string
+    paper_url: string
+    data_availability: string        # the paper's data-availability statement, verbatim or summarized
+    repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
+    identifier: string               # DOI / accession / direct URL for the data
+    access_status: access_status     # acquired | open_unfetched | restricted | not_found
+    local_path: string               # repo-root-relative path once acquired (else empty)
+    covers_laws: [string]
+
+  cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
+    id: string
+    research_question: string        # the intent the discovery runs against (from mission.md)
+    inclusion_criteria: string
+    exclusion_criteria: string
+    sampling: string
+    source_data_sources: [string]    # data_source ids the cohort was assembled from
+    discovery_subset: {definition: string, n: number, path: string}   # what discovery sees
+    holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
+    run_id: string                   # the stood-up auto-ds run (autodiscovery create)
+
+  reproduction_design:
+    law_id: string
+    experiment_name: string
+    plain_language_description: string
+    original_operationalization: string
+    independent_operationalization: string
+    construct_equivalence: construct_equivalence
+    feasibility: feasibility
+    required_data: string
+    data_gap: string
 
   analysis:
-    output:
-      verdict: enum [supported, refuted, inconclusive]
-      confidence: number                 # 0.0 - 1.0
-      reasoning: string
-      caveats: [string]
-
-  synthesis:
-    output:
-      answer: string                     # answer to scope.question
-      supporting_hypotheses: [bd_id]
-      refuted_hypotheses: [bd_id]
-      open_questions: [string]           # become discovered-from edges on re-plan
-      report_path: string                # generated markdown report
+    final_answer: string
+    assumptions: [string]
+    figures: [{caption: string, image: string}]
+    code: string
+
+  audit_report:
+    subject_id: string                     
+    analysis_id: string
+    challenges: [{concern: string, check: string, outcome: string}]
+    artifacts_found: [string]
+    verdict_survives: boolean
+    recommended_adjustment: string
+
+  extracted_data:
+    id: string
+    run_id: string
+    paper_id: string
+    extraction_schema_id: string
+    rows:
+      - name_short: string
+        name_full: string
+        brief_description: string
+        citation_title: string
+        uuid: string
+
+  theory:
+    id: string
+    name: string
+    description: string
+    theory_query: string
+    objective: generation_objective
+    grounds_law_ids: [string]
+    supporting_evidence_ids: [string]
+    components:
+      theory_statements:
+        - statement_name: string
+          theory_statement: string
+          supporting_evidence: [{text: string, uuids: [string]}]
+          conflicting_evidence: [{text: string, uuids: [string]}]
+      new_predictions_likely: [string]
+      new_predictions_unknown: [string]
+      unaccounted_for: [{text: string, uuids: [string]}]
+
+  testability_triage:
+    assessments:
+      - theory_id: string
+        testable_now: boolean
+        available_data: string
+        required_data: string
+        proposed_test: string
+        gap: string
+    testable_theory_ids: [string]
+
+  theory_evaluation:
+    id: string
+    theory_id: string
+    novelty: novelty
+    overall_support_or_contradict: string
+    overall_support_or_contradict_explanation: string
+
+  verification:
+    theory_id: string
+    prediction: string
+    verdict: verification_verdict
+    effect_size: string
+    data_used: string
+    audit_survived: boolean
+    analysis_id: string
+
+  next_run_proposal:
+    kind: next_step_kind
+    title: string
+    tests: [string]
+    data_needed: string
+    expected_signature: string
+    priority: priority
+
+  # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
+  # theory_report, verification_report), one standalone data-gaps report, and a
+  # theory-led master (research_report). Each carries report_path (the .md deliverable
+  # written first), a title, a one-line headline, a typed body, and `links` back to the
+  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
+  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+
+  provenance_report:
+    report_path: string
+    title: string
+    headline: string
+    sources:
+      - dataset_id: string
+        paper_title: string
+        paper_url: string
+        repository: string
+        access_status: access_status
+        local_path: string
+    acquired: [string]
+    not_acquired: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  reproduction_report:
+    report_path: string
+    title: string
+    headline: string
+    method_note: string
+    laws_ledger:
+      - law_id: string
+        statement: string
+        outcome: outcome
+        testability: testability
+        effect_size_source: string
+        effect_size_reproduction: string
+        independence_axes: [independence_axis]
+        evidence: string
+    what_held: [string]
+    what_failed_or_untestable: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  theory_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theories:
+      - theory_id: string
+        name: string
+        objective: generation_objective
+        one_line: string
+        grounds_law_ids: [string]
+        novelty: novelty
+        testable_now: boolean
+        supporting_evidence_ids: [string]
+    novelty_summary: string
+    new_predictions: [string]
+    open_threads: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  verification_report:
+    report_path: string
+    title: string
+    headline: string
+    novelty_by_verification:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verdict: verification_verdict
+        effect_size: string
+        data_used: string
+        audit_survived: boolean
+    what_was_tested: string
+    what_could_not_be_tested: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  data_gaps_report:
+    report_path: string
+    title: string
+    headline: string
+    gaps:
+      - item: string
+        missing_data: string
+        blocks: string
+        severity: priority
+        arose_in: string
+    next_steps: [next_run_proposal]
+    links: [{label: string, ref: string}]
+
+  research_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theory_highlights:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verification: verification_verdict
+    inference_chain: [{claim: string, chain: [string]}]
+    what_was_done: [string]
+    sub_reports: [{kind: string, report_path: string, one_line: string}]
+    tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+
+  discovery_report:                  # synthesis output of the auto_discovery flow
+    report_path: string
+    title: string
+    headline: string
+    laws:
+      - law_id: string
+        statement: string
+        surprise: number             # the discovery run's surprise signal for this candidate law
+        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
+        effect_size: string
+    next_steps: [next_run_proposal]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+tasks:
+  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
+  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
+  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
+  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
+  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
+  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
+  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
+  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
+  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
+  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
+  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
+  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
+  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
+  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
+  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
+  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
+  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
+  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
+  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
+  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
+  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
+  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
+  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
+  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
+  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
+  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+
+flows:
+
+  data_and_literature_grounded_theory_generation:
+    mission: Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.
+    data_provenance:
+      mission: Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.
+      chain:
+        - {workflow: data_provenance, mission: Source the papers and datasets the run named in the mission was built on; acquire the open data and record what is restricted.}
+    reproduction:
+      mission: Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.
+      chain:
+        - {workflow: reproduction, mission: Import the run named in the mission; reproduce each law on independent data with construct-equivalence and a feasibility gate.}
+    theorizer:
+      mission: Generate literature- and data-grounded theories of the reproduced laws and score their novelty.
+      chain:
+        - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
+    verification:
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      analysis:
+        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_verification:
+        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+        chain: []
+    verification_synthesis:
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      chain: []
+    gap_synthesis:
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      chain: []
+    final_synthesis:
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      chain: []
+
+  data_provenance:
+    mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
+    provenance_search:
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      chain: [asta literature find, asta papers search]
+    provenance_extraction:
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    data_acquisition:
+      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      chain: [asta documents, asta autodiscovery upload]
+    provenance_synthesis:
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      chain: []
+
+  reproduction:
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    data_driven_discovery:
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      chain: [asta autodiscovery run, asta autodiscovery experiments]
+    law_extraction:
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      chain: []
+    evidence_gathering:
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
+    replication:
+      mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
+      reproduction_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+        chain: [asta experiment]
+      analysis:
+        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduction_audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduce:
+        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+        chain: []
+    reproduction_synthesis:
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      chain: []
+
+  theorizer:
+    mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
+    evidence_extraction:
+      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    theory_generation:
+      mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
+      theory_formation:
+        mission: Form theories from the shared extraction store under this branch's objective.
+        chain: [asta generate-theories form-theory]
+    testability_triage:
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      chain: []
+    novelty_assessment:
+      mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      chain: [asta generate-theories evaluate-novelty]
+    theory_synthesis:
+      mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      chain: []
+
+  auto_discovery:
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    cohort_assembly:
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
+    discovery_run:
+      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      chain: [asta autodiscovery submit, asta autodiscovery experiments]
+    replication:
+      mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      holdout_replication:
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+    discovery_synthesis:
+      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      chain: []
diff --git a/plugins/asta/skills/research-step/scripts/close-task.sh b/plugins/asta/skills/research-step/scripts/close-task.sh
new file mode 100755
index 0000000..673b23f
--- /dev/null
+++ b/plugins/asta/skills/research-step/scripts/close-task.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# close-task.sh <issue-id> <output-json> <output-markdown>
+# Publish a task's output and finish it: write output_json + output_markdown into the issue
+# metadata, validate output_json against the schema, close the issue, assert it closed, then
+# close any ancestor group whose last child just closed.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+[[ $# -eq 3 ]] || { echo "usage: close-task.sh <issue-id> <output-json> <output-markdown>" >&2; exit 1; }
+id="$1"; oj="$2"; om="$3"
+[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; }
+[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; }
+jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; }
+
+# 1. publish: merge output_json + output_markdown into the existing research_step metadata
+cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
+merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
+  '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
+tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+bd update "$id" --metadata @"$tmp" >/dev/null
+
+# 2. validate structurally (reads the issue back; no style lint)
+bash "$here/validate-output.sh" "$id"
+
+# 3. close and 4. assert closure
+bd close "$id" >/dev/null
+[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \
+  || { echo "close-task: $id did not close" >&2; exit 2; }
+echo "closed $id"
+
+# 5. cascade: close each ancestor group whose direct children are all closed
+cur_id="$id"
+while [[ "$cur_id" == *.* ]]; do
+  parent="${cur_id%.*}"
+  bd show "$parent" --json >/dev/null 2>&1 || break
+  open_kids="$(bd list --json | jq --arg p "$parent" '
+    [ .[]
+      | select(.id | startswith($p + "."))
+      | select((.id[($p|length)+1:] | contains(".")) | not)
+      | select(.status != "closed") ] | length')"
+  [[ "$open_kids" -eq 0 ]] || break
+  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  cur_id="$parent"
+done
diff --git a/plugins/asta/skills/research-step/scripts/create-task.sh b/plugins/asta/skills/research-step/scripts/create-task.sh
new file mode 100755
index 0000000..6024cf6
--- /dev/null
+++ b/plugins/asta/skills/research-step/scripts/create-task.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# create-task.sh <parent-id> <task_type> <flow> <title> <brief-description> [input-id ...]
+# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description,
+# and initialized research_step metadata. output_json / output_markdown stay null until
+# execute publishes them via close-task.sh. Prints the new issue id.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
+parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
+
+python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
+PY
+
+[[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
+[[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
+[[ "${#desc}" -le 200 ]]    || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; }
+
+if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
+meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/plugins/asta/skills/research-step/scripts/validate-output.sh b/plugins/asta/skills/research-step/scripts/validate-output.sh
index ab46d65..af3b8f6 100755
--- a/plugins/asta/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta/skills/research-step/scripts/validate-output.sh
@@ -1,166 +1,43 @@
 #!/usr/bin/env bash
-# validate-output.sh — structural validation of a research_step output JSON.
-#
-# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
-#
-# Verifies that the JSON file:
-#   1. parses
-#   2. carries the metadata envelope
-#      ({research_step: {task_type, inputs, output_schema_version, output}})
-#   3. has every required `output.<key>` for the given <task_type> per
-#      assets/schemas.yaml (schema_version: 1)
-# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md.
-#
-# Exit codes:
-#   0  — valid
-#   2  — JSON parse error
-#   3  — unknown task_type
-#   4  — missing required field
-#   5  — task_type mismatch with envelope
-#   6  — required output.md missing (only when [task-dir] supplied)
-#   7  — output.md empty or a stub (only when [task-dir] supplied)
-#   8  — output.md has no markdown links (only when [task-dir] supplied)
-#   9  — a named entity is unlinked (only when [task-dir] supplied)
-#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
-#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
-#           <3 embedded figures (14), a required section is missing (15)
-#
-# Structural checks only — required fields, working links, and the report's basic pieces.
+# validate-output.sh <issue-id> — structural check of a task's stored output_json.
+# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
+# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
+# No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
 set -euo pipefail
-
-if [[ $# -lt 2 || $# -gt 3 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
-  exit 1
-fi
-
-task_type="$1"
-file="$2"
-task_dir="${3:-}"
-
-if ! jq -e . "$file" > /dev/null 2>&1; then
-  echo "validate-output: $file is not valid JSON" >&2
-  exit 2
-fi
-
-# Required output fields, mirroring assets/schemas.yaml (schema_version: 1).
-case "$task_type" in
-  scope)              required="question boundaries success_criteria" ;;
-  definitions)        required="terms" ;;
-  literature_review)  required="summary_path key_findings gaps citations" ;;
-  hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
-  experiment_design)  required="method procedure variables artifacts_expected" ;;
-  evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
-  analysis)           required="verdict confidence reasoning caveats" ;;
-  synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
-  *)
-    echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
-    exit 3
-    ;;
-esac
-
-# The envelope must carry the matching task_type so we don't validate scope JSON
-# against an analysis schema by accident.
-envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
-if [[ -z "$envelope_type" ]]; then
-  echo "validate-output: $file missing .research_step.task_type" >&2
-  exit 5
-fi
-if [[ "$envelope_type" != "$task_type" ]]; then
-  echo "validate-output: envelope task_type='$envelope_type' but expected '$task_type'" >&2
-  exit 5
-fi
-
-# Envelope shape.
-for key in inputs output_schema_version output; do
-  if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: $file missing .research_step.$key" >&2
-    exit 5
-  fi
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
+id="$1"
+
+rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // empty')"
+[[ -n "$rs" ]] || { echo "validate-output: $id has no metadata.research_step" >&2; exit 2; }
+task_type="$(jq -r '.task_type // empty' <<<"$rs")"
+[[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
+
+expected="$(python3 - "$schemas" "$task_type" <<'PY'
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+t = d["tasks"].get(sys.argv[2])
+if t is None: sys.exit(3)
+print(" ".join(t["output"]))
+PY
+)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+
+got="$(jq -c '.output_json // empty' <<<"$rs")"
+[[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
+
+for k in $expected; do
+  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
+    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
 done
-
-# Required output fields.
-for key in $required; do
-  if ! jq -e ".research_step.output | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: missing required field 'output.$key' for task_type '$task_type'" >&2
-    exit 4
-  fi
-done
-
-# Type spot-checks for the high-leverage cases. Not exhaustive — just the
-# fields where a wrong type at this layer would silently break update-summary rendering
-# or downstream tasks.
-case "$task_type" in
-  literature_review)
-    jq -e '.research_step.output.key_findings | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.key_findings must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.gaps | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.gaps must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.citations | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.citations must be an array" >&2; exit 4; }
-    ;;
-  analysis)
-    jq -e '.research_step.output.verdict | IN("supported", "refuted", "inconclusive")' "$file" >/dev/null \
-      || { echo "validate-output: output.verdict must be one of supported|refuted|inconclusive" >&2; exit 4; }
-    jq -e '.research_step.output.confidence | type == "number" and . >= 0 and . <= 1' "$file" >/dev/null \
-      || { echo "validate-output: output.confidence must be a number in [0, 1]" >&2; exit 4; }
-    ;;
-esac
-
-# output.md document-quality gate. Every task must produce a human-readable
-# output.md (skill "Task outputs" table) that links the entities it names.
-if [[ -n "$task_dir" ]]; then
-  md="$task_dir/output.md"
-  if [[ ! -f "$md" ]]; then
-    echo "validate-output: required output.md not found at '$md'" >&2
-    exit 6
-  fi
-  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
-    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
-    exit 7
-  fi
-  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
-    echo "validate-output: output.md has no markdown links" >&2
-    exit 8
-  fi
-  # Strip links, then flag any named entity still bare in output.md / report.tex.
-  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
-    [[ -f "$f" ]] && perl -ne '
-      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
-      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
-      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
-    ' "$f"
-  done) || true
-  if [[ -n "$unlinked" ]]; then
-    echo "$unlinked" >&2
-    echo "validate-output: named entities above are unlinked" >&2
-    exit 9
-  fi
-
-  # The report's basics. Only the report node makes report.tex; when it exists,
-  # check it has what report_example.tex has. Each failure points back to it.
-  rpt="$task_dir/artifacts/report.tex"
-  if [[ -f "$rpt" ]]; then
-    ref="templates/examples/report_example.tex"
-    rfail() {
-      echo "report-gate: $1" >&2
-      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
-      echo "     its depth and citation density before retrying." >&2
-      exit "$2"
-    }
-    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
-    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
-      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
-      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
-    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
-    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
-    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
-    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
-      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
-    done
-  fi
-fi
+while IFS= read -r k; do
+  case " $expected " in *" $k "*) ;; *)
+    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
+  esac
+done < <(jq -r 'keys[]' <<<"$got")
+jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
+  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
 
 echo "ok"
diff --git a/plugins/asta/skills/research-step/scripts/write-meta.sh b/plugins/asta/skills/research-step/scripts/write-meta.sh
deleted file mode 100755
index 6e7d71a..0000000
--- a/plugins/asta/skills/research-step/scripts/write-meta.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-# write-meta.sh — materialize a metadata JSON blob to a temp file and print
-# its path, suitable for `bd update <id> --metadata @<path>` or
-# `bd create ... --metadata=@<path>`.
-#
-# Reads JSON from stdin (or from $1 if a path is given), validates that it
-# parses, and writes it under $TMPDIR with mode 0600. The path is printed on
-# stdout so the caller can splice it into a bd command.
-#
-# Why this exists: `bd update --metadata` accepts either a JSON string or
-# `@file.json`. Inlining a JSON string requires `"$(cat /tmp/x.json)"` (a
-# non-bd shell op the SKILL.md frontmatter does not permit), and shell quoting
-# gets fragile with embedded quotes. Materializing a file once and using
-# `@path` keeps everything in `Bash(bd:*)` territory.
-set -euo pipefail
-
-tmp=$(mktemp -t research-step-meta.XXXXXX.json)
-trap 'rm -f "$tmp"' ERR
-
-if [[ $# -ge 1 ]]; then
-  cp "$1" "$tmp"
-else
-  cat > "$tmp"
-fi
-
-if ! jq -e . "$tmp" >/dev/null 2>&1; then
-  echo "write-meta: input is not valid JSON" >&2
-  rm -f "$tmp"
-  exit 2
-fi
-
-chmod 0600 "$tmp"
-echo "$tmp"
diff --git a/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md b/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
deleted file mode 100644
index 15a5875..0000000
--- a/plugins/asta/skills/research-step/templates/data_driven_theory_generation.md
+++ /dev/null
@@ -1,118 +0,0 @@
----
-name: data_driven_theory_generation
-description: |
-  See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the most promising with new experiments.
----
-
-# Data-driven theory generation
-
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  data_provenance["Data provenance"]
-  definitions --> data_provenance
-  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
-  data_provenance --> auto_discovery
-  subgraph sub1["for each of the 10 surprising findings"]
-    direction TB
-    hypothesis["Restate finding"]
-    literature_review["Literature search"]
-    experiment_design["Pre-register test"]
-    evidence_gathering["Find independent data"]
-    analysis["Replicate"]
-    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
-    analysis -- "retry: inconclusive → re-spec" --> experiment_design
-    analysis -- "retry: bad data → re-locate" --> evidence_gathering
-  end
-  auto_discovery --> hypothesis
-  replication_synthesis["Replication summary (k of 10, by mechanism)"]
-  analysis --> replication_synthesis
-  theorizer_theories["Theorizer-grounded theories"]
-  replication_synthesis --> theorizer_theories
-  novelty["Score theories for novelty"]
-  theorizer_theories --> novelty
-  subgraph sub2["for each of the top 3 theories"]
-    direction TB
-    followon_exp_design["Pre-register experiment (AED)"]
-    followon_evidence["Find new data"]
-    followon_analysis["Run, or leave as a proposal"]
-    followon_exp_design --> followon_evidence --> followon_analysis
-  end
-  novelty --> followon_exp_design
-  report["Closing report"]
-  followon_analysis --> report
-  report --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
-| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
-| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
-| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
-| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
-| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
-| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
-| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
-| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
-| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
-| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
-
-The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
-
-## Running DataVoyager
-
-Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
-
-A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
-
-| What DataVoyager did | Go back to | Fix |
-|---|---|---|
-| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
-| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
-| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
-
-## mission.md
-
-- `run_pointer:` — the AutoDS run to import (omit to create one).
-- `datasets[]` — input dataset URIs for a new run.
-- A focus statement in the body — the question under study.
-
-Unless the user explicitly says to use local inputs only, fetch external public data for replication.
-
-## Writing the report and outputs
-
-These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
-
-- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
-- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
-- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
-
-  | thing | link to |
-  |---|---|
-  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
-  | paper | the asta document, paper URL, or `data_provenance` entry |
-  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
-  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
-  | dataset | the file under `inputs/`, or the Datasets appendix |
-  | experiment E-number | its appendix entry |
-
-- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
-- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
-- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/plugins/asta/skills/research-step/templates/examples/report_example.tex b/plugins/asta/skills/research-step/templates/examples/report_example.tex
deleted file mode 100644
index e87ebf5..0000000
--- a/plugins/asta/skills/research-step/templates/examples/report_example.tex
+++ /dev/null
@@ -1,620 +0,0 @@
-% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
-%
-% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
-% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
-%
-% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
-% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
-% your run embeds its own figures from `artifacts/`.
-
-\documentclass[11pt]{article}
-\usepackage[margin=1in]{geometry}
-\usepackage{amsmath}
-\usepackage{amssymb}
-\usepackage{hyperref}
-\usepackage{booktabs}
-\usepackage{longtable}
-\usepackage{array}
-\usepackage{enumitem}
-\usepackage{xcolor}
-\usepackage{microtype}
-\usepackage{graphicx}
-\usepackage{titling}
-\usepackage{fancyhdr}
-\usepackage{titlesec}
-\usepackage{tabularx}
-\usepackage{tikz}
-\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
-\definecolor{paperfinderpurple}{HTML}{6D28D9}
-
-\hypersetup{
-  colorlinks=true,
-  linkcolor=blue!55!black,
-  urlcolor=blue!55!black,
-  citecolor=blue!55!black,
-}
-
-\pagestyle{fancy}
-\fancyhf{}
-\fancyhead[L]{Multi-Agent Computational Investigation}
-\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
-\fancyfoot[C]{\thepage}
-\renewcommand{\headrulewidth}{0.4pt}
-
-\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
-\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
-
-\setlength{\parskip}{0.5em}
-
-\begin{document}
-
-\begin{titlepage}
-\thispagestyle{empty}
-\vspace*{0.6in}
-\begin{center}
-{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
-\end{center}
-
-\vspace*{40pt}
-
-\noindent\makebox[\textwidth][c]{%
-\begin{tikzpicture}[
-  font=\footnotesize,
-  procbox/.style={
-    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  agentbox/.style={
-    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  paperbox/.style={
-    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  finalbox/.style={
-    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
-  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
-  node distance=0.45cm and 0.45cm,
-]
-% Band 1: discovery phase, left-to-right
-\node[procbox] (scope) {Scope \&\\Definitions};
-\node[procbox, right=of scope] (prov) {Data\\Provenance};
-\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
-\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
-\node[procbox, right=of laws] (themes) {Cluster\\Themes};
-
-% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
-\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
-\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
-\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
-\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
-\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
-
-\begin{scope}[on background layer]
-\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
-\end{scope}
-\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
-
-% Band 3: synthesis + follow-on, left-to-right
-\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
-\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
-\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
-\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
-\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
-
-\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
-
-% Band 1 arrows
-\draw[arr] (scope) -- (prov);
-\draw[arr] (prov) -- (ad);
-\draw[arr] (ad) -- (laws);
-\draw[arr] (laws) -- (themes);
-
-% Band 1 -> Band 2: straight down themes -> lit
-\draw[arr] (themes) -- (lit);
-
-% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
-\draw[arr] (lit) -- (hyp);
-\draw[arr] (hyp) -- (evid);
-\draw[arr] (evid) -- (exp);
-\draw[arr] (exp) -- (rep);
-
-% Retry self-loop on rep (black so it reads clearly)
-\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
-
-% Band 2 -> Band 3: straight down rep -> across
-\draw[arr] (rep) -- (across);
-
-% Band 3 arrows
-\draw[arr] (across) -- (theo);
-\draw[arr] (theo) -- (nov);
-\draw[arr] (nov) -- (aed);
-\draw[arr] (aed) -- (dv2);
-
-% Band 3 -> final report
-\draw[arr] (dv2) -- (rep_final);
-
-% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
-\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
-\end{tikzpicture}%
-}
-
-\vspace*{\fill}
-\begin{center}
-\footnotesize\itshape\color{gray!50!black}
-Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
-\end{center}
-\end{titlepage}
-
-\tableofcontents
-\newpage
-
-%---------------------------------------------------------------
-\section{Mission}
-
-This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
-
-We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
-
-The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
-
-%---------------------------------------------------------------
-\section{Abstract}
-
-We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
-
-\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
-\begin{itemize}[noitemsep]
-\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
-\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
-\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
-\end{itemize}
-
-\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
-
-\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
-
-%---------------------------------------------------------------
-\section{Background and Motivation}
-
-\subsection{The Pakistan WPV1 resurgence}
-
-Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
-\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
-\label{fig:national}
-\end{figure}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
-\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
-\label{fig:district}
-\end{figure}
-
-\subsection{The older-cohort hypothesis}
-
-Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
-
-\subsection{Prior AutoDiscovery findings}
-
-Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
-\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
-\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
-\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
-\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
-\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
-\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
-\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
-\end{description}
-
-These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
-
-%---------------------------------------------------------------
-\section{Methods}
-\label{sec:methods}
-
-\subsection{Data sources}
-
-The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
-
-\begin{itemize}[noitemsep]
-\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
-\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
-\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
-\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
-\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
-\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
-\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
-\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
-\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
-\end{itemize}
-
-\subsection{Computational agents and their roles}
-
-\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
-
-\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
-
-\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
-
-\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
-
-\subsection{AutoDiscovery curation and replication design}
-\label{sec:methods_ad}
-
-The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
-
-For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
-
-\subsection{Cross-source robustness experiments}
-
-Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
-
-\subsection{Theorizer runs}
-
-Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
-
-\subsection{AutoExperimentDesigner follow-on protocols}
-
-After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
-
-For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
-
-\subsection{Statistical procedures}
-
-\begin{itemize}[noitemsep]
-\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
-\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
-\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
-\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
-\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Results}
-
-The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
-\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
-\label{fig:matrix}
-\end{figure}
-
-\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
-
-The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
-
-The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
-E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
-E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L1.}
-\end{table}
-
-\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
-
-\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
-
-L2 was tested in three independent ways and was refuted in all three.
-
-In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
-
-Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
-\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
-\label{fig:subtype}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
-E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
-E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
-E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L2.}
-\end{table}
-
-\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
-
-\subsection{Law L3 --- Two-regime household contact intensity}
-
-L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
-
-\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
-
-\subsection{Law L4 --- Cross-border mobility mechanism}
-
-L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
-
-\begin{itemize}[noitemsep]
-\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
-\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
-\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
-\end{itemize}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
-\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
-\label{fig:pakafg}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
-E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
-E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
-E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L4.}
-\end{table}
-
-\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
-
-\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
-
-L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
-
-The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
-
-\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
-
-%---------------------------------------------------------------
-\section{Pre-Registered Confirmatory Test of the Combined Theory}
-\label{sec:final}
-
-\subsection{Background and rationale}
-
-The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
-
-This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
-
-\subsection{AutoExperimentDesigner pre-registered protocol}
-
-The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
-
-\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
-
-\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
-
-\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
-
-\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
-
-\subsection{DataVoyager execution}
-
-The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
-
-A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
-
-\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
-
-The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
-
-\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
-
-In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
-
-The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
-
-\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
-
-At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
-
-\subsection{Combined verdict}
-
-All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Component & Key statistic & Status \\
-\midrule
-P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
-P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
-P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
-Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
-\bottomrule
-\end{tabular}
-\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
-\end{table}
-
-%---------------------------------------------------------------
-\section{Trustworthiness Analysis}
-
-\subsection{What we can trust}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
-\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
-\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
-\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
-\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
-\end{itemize}
-
-\subsection{Key limitations}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
-\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
-\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
-\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
-\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
-\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
-\end{itemize}
-
-\subsection{Deviations from protocol}
-
-\begin{itemize}[noitemsep]
-\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
-\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Conclusions}
-
-\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
-
-\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
-
-\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
-
-\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
-
-\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
-
-%---------------------------------------------------------------
-\section{Future Directions}
-
-\begin{enumerate}[noitemsep]
-\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
-\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
-\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
-\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
-\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
-\end{enumerate}
-
-%---------------------------------------------------------------
-\appendix
-
-\section{Computational Experiment Catalogue}
-\label{app:experiments}
-
-This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
-
-\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
-
-\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
-
-\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
-
-\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
-
-\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
-
-\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
-
-\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
-
-\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
-
-\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
-
-\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
-
-\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
-
-\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
-
-\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
-
-\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
-
-\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
-
-\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
-
-\section{Datasets}
-\label{app:datasets}
-
-\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
-\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
-
-\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
-
-\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
-
-\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
-
-\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
-
-\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
-
-\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
-
-\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
-
-\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
-
-\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
-\end{description}
-
-\section{References}
-
-The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
-
-\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
-\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
-
-\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
-
-\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
-
-\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
-
-\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
-
-\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
-
-\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
-
-\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
-
-\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
-
-\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
-
-\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
-
-\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
-
-\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
-
-\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
-
-\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
-
-\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
-
-\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
-\end{description}
-
-\end{document}
diff --git a/plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md b/plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md
deleted file mode 100644
index acaa800..0000000
--- a/plugins/asta/skills/research-step/templates/examples/theorizer_mission_example.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# Example theorizer mission statement
-
-This is a worked example of the **mission statement** passed to the theorizer in the
-`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
-run's `mission.md`; it is the prompt the theorizer receives once the per-theme
-reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
-and the per-theme findings.
-
-A well-formed theorizer mission does five things, and this example shows all five:
-
-1. **States the question** in one sentence, naming the phenomenon and the population of interest.
-2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
-3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
-4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
-5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
-
-Tagging each finding/question/constraint with its supporting experiment is what keeps
-the returned theories anchorable: downstream, `theorizer_theories` drops any theory
-without ≥1 law anchor, and this structure makes the anchor explicit.
-
----
-
-```
-Mission: Generate theories that explain the role of populations aged 5+ years in
-Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
-findings and the open questions they leave unresolved.
-
-SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
-  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
-      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
-  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
-      strengthening significantly after 2021 (X2).
-  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
-      by under-5 population share, with under-5 share dominating 15-64 working-age
-      share (T2 retry-1).
-  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
-      dominates in adult-heavy districts (X4, p<0.001).
-  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
-      scale (T5 retry-0/1).
-  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
-      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
-      (X7).
-
-OPEN QUESTIONS (theories should address at least one):
-  Q1. What replaced national Pol3 coverage as the dominant transmission lever
-      after 2018-2019?
-  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
-      drives the case coupling intensification?
-  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
-      in young districts) appear?
-  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
-      they are NOT the dominant district-level predictor but ARE plausibly the
-      operative mobility vectors?
-
-CONSTRAINTS (refuted framings to avoid):
-  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
-      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
-  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
-      refuted at district by T2, at province by T2 retry-4, on silent-transmission
-      signature by X3, and on subtype contrast by X4.
-  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
-      by T5 retry-0/1.
-  C4. Theories centered on resident Afghan refugee populations as a static mobility
-      channel — refuted by X7.
-
-REWARDED FRAMINGS:
-  R1. Theories that explain the 2018-2019 break date in terms of immunological,
-      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
-  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
-      deportations, seasonal transit) consistent with the post-2021 intensification.
-  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
-      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
-      while WPV1 retains a pediatric profile.
-  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
-      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
-  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
-      (mobility) — the two laws DV reproduced.
-```
diff --git a/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md b/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
deleted file mode 100644
index eb3c847..0000000
--- a/plugins/asta/skills/research-step/templates/hypothesis_driven_research.md
+++ /dev/null
@@ -1,50 +0,0 @@
----
-name: hypothesis_driven_research
-description: |
-  Literature-grounded hypothesis generation. Survey the literature, raise a
-  hypothesis per gap, test each, and write a closing report.
----
-
-# Hypothesis-driven research
-
-Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  lit_review["Literature review"]
-  definitions --> lit_review
-  subgraph sub1["for each gap"]
-    direction TB
-    hypothesis["Hypothesis"]
-    experiment_design["Experiment design"]
-    evidence_gathering["Evidence gathering"]
-    analysis["Analysis"]
-    hypothesis --> experiment_design --> evidence_gathering --> analysis
-  end
-  lit_review --> hypothesis
-  closing["Closing synthesis"]
-  analysis --> closing
-  closing --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | One line: the question under study. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
-| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
-| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
-| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
-| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
-
-The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/plugins/asta/skills/research-step/workflows/brainstorm.md b/plugins/asta/skills/research-step/workflows/brainstorm.md
index 884f48f..250ba36 100644
--- a/plugins/asta/skills/research-step/workflows/brainstorm.md
+++ b/plugins/asta/skills/research-step/workflows/brainstorm.md
@@ -25,7 +25,7 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
@@ -41,10 +41,10 @@ Pick the branch that matches; do not run more than one.
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
-| Single issue's full `metadata.research_step.output` | `bd show <id> --json`                                                                                  |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list`                                                                                              |
-| Dependency structure | `bd dep tree <epic-id> --direction up`|
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output.summary_path` referenced from the digest                         |
+| Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
+| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
+| Task tree | `bd list --json` — ids encode the parent-child outline |
+| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
 | Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
diff --git a/plugins/asta/skills/research-step/workflows/execute.md b/plugins/asta/skills/research-step/workflows/execute.md
index 3d1a84f..a8596e2 100644
--- a/plugins/asta/skills/research-step/workflows/execute.md
+++ b/plugins/asta/skills/research-step/workflows/execute.md
@@ -1,35 +1,33 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
 
 ## Preconditions
 
 - An epic root exists (`scripts/epic-root.sh` prints `status: found`).
-- `bd ready --json` is non-empty, **or** the caller supplied a specific task ID that is currently `open` and unblocked.
+- An open issue with a `task_type` exists, **or** the caller supplied a specific `open` task ID.
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
+1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
 2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
+3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
+4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
+7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
 
-   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
-6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
+## Notes on output
 
-## Notes on output files
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
 
-Schema fields ending in `_path` are relative paths. Conventions:
+Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `summary_path` (from `literature_review`) → `background_knowledge.txt` by convention, but any path works.
-- `log_path` (from `evidence_gathering`) → typically under `logs/`.
-- `report_path` (from `synthesis`) → typically `report.md`.
+- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
 
-Write the file before setting the output JSON. If the executor crashes between writing the file and closing the issue, the file is harmless orphan data — re-running `execute` on the same issue will overwrite it.
+If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
 ## Out of scope for this workflow
 
diff --git a/plugins/asta/skills/research-step/workflows/init.md b/plugins/asta/skills/research-step/workflows/init.md
index 4df19c0..fd11be3 100644
--- a/plugins/asta/skills/research-step/workflows/init.md
+++ b/plugins/asta/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
+After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
 ## Preconditions
 
diff --git a/plugins/asta/skills/research-step/workflows/plan.md b/plugins/asta/skills/research-step/workflows/plan.md
index 06ae941..a000e2d 100644
--- a/plugins/asta/skills/research-step/workflows/plan.md
+++ b/plugins/asta/skills/research-step/workflows/plan.md
@@ -1,89 +1,92 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. The flow chains live in `assets/schemas.yaml` (`flows`) — plan reads them, it does not hardcode the sequence. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet: pick a flow and lay its first step(s).
+- **replan** — an epic exists: after a step closes, add the next step(s) in its flow chain.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized (else run **init**).
+- **bootstrap**: `mission.md` exists; no epic yet (`scripts/epic-root.sh` → `none`).
+- **replan**: an epic exists; either `execute` supplied the closed source task, or the user named what to extend.
 
-## Issue metadata convention
+## Task metadata
 
-Every task issue carries:
+Create task leaves with `scripts/create-task.sh <parent> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]`. It sets `metadata.research_step = {flow, task_type, inputs, output_schema_version, output_json: null, output_markdown: null}` and a **brief one-line `description`** (it rejects a missing, multi-line, or over-long description). `execute` later publishes `output_json` (the structured result) and `output_markdown` (the narrative) via `close-task.sh`; the description is not overwritten. The epic carries `epic_root: true`; group nodes (loops, fan-outs, branches) are epics created with `bd create --parent <parent> -t epic` (no task_type, no description rules). A session may run several flows — the flow is per task, not per epic.
 
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
+## Indentation is the tree
 
-The mission epic additionally carries `epic_root: true`.
+The flow in `assets/schemas.yaml` is an indented outline, and the beads graph you build **is that same outline**: each indentation level in the flow becomes one parent-child level in beads. Build it with `bd create --parent`, walking the flow top-down, so hierarchical ids (`wf`, `wf.1`, `wf.1.1`, …) encode the outline position. There are **no `blocks`/`deps` edges** — ordering is the id order, because you create nodes in the order they run.
 
-## Mode selection
+Reading a flow node:
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
-## Bootstrap mode
+The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
-   ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
-   bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
-   ```
-3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
-4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
-5. **Report.** Print the epic ID and the created task IDs.
+```
+wf                      [epic]    <mission>
+ wf.1                   [loop]    reproduction
+  wf.1.1                          data_driven_discovery
+  wf.1.2                          law_extraction
+  wf.1.3                          evidence_gathering
+  wf.1.4                [fan-out] replication            one branch per law
+   wf.1.4.1             [branch]  <law>
+    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.2                    analysis
+    wf.1.4.1.3                    reproduction_audit
+    wf.1.4.1.4                    reproduce
+   wf.1.4.2             [branch]  <law> …
+  wf.1.5                          reproduction_synthesis
+```
 
-## Replan mode
+The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproduction, `wf.3` theorizer, `wf.4` verification (one branch per testable theory), `wf.5` verification_synthesis, `wf.6` gap_synthesis, `wf.7` final_synthesis. Each sub-flow ends in its own synthesis step that emits a report (provenance_report, reproduction_report, theory_report, verification_report); gap_synthesis aggregates their gaps into data_gaps_report and final_synthesis writes the theory-led research_report.
 
-Read the source task's task_type and output:
+## Ordering and closing (no edges)
 
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+
+## Static vs data-dependent fan-outs
+
+- **Static** (`theory_generation` by objective): both branches are known up front → create them together.
+- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+
+## Gates (replan)
 
-Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
+- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
 
-- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
-- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
-- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
+## Bootstrap
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
+2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
 
-### Auto-resolving hypothesis tasks
+## Replan
 
-When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
+When a step closes, create the next node(s) under their parent, in flow order:
 
-1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why the source implies the claim (for a finding, cite its node id)
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
+- Apply the **Gates** rules above.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**. There are no edges to verify — the parent-child tree is the whole structure.
 
-## Not here
+## Out of scope
 
-Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
+- Running tasks or producing outputs (**execute**).
+- Environment setup (**init**); editing `mission.md` (**brainstorm**); judging output quality.
diff --git a/plugins/asta/skills/research-step/workflows/update-summary.md b/plugins/asta/skills/research-step/workflows/update-summary.md
index a79f6ff..311c81a 100644
--- a/plugins/asta/skills/research-step/workflows/update-summary.md
+++ b/plugins/asta/skills/research-step/workflows/update-summary.md
@@ -15,11 +15,10 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** All you need to fill the template comes from a few `bd` queries:
-   - `bd list --json` for the full tree (issue_count, status partition).
-   - `bd ready --json` for the ready list (also drives the Next Steps section).
-   - `bd blocked --json` for the blocked count.
-   Project each list to `{id, task_type: .metadata.research_step.task_type, title}` with `jq` and partition by `.status`.
+3. **Gather state inline.** Everything comes from `bd list --json`:
+   - the full tree (issue_count, status partition);
+   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
+   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
 5. **Overwrite `summary.md`** using this template:
 
@@ -61,13 +60,12 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Ready: <n> — IDs: <list>
-   - Blocked: <n>
+   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
 
    ### Next Steps
-   <from `bd ready --json`: one bullet per ready issue, formatted as
+   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If `bd ready` is empty, write "No ready tasks — graph is blocked or complete.">
+   If there are no open task issues, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)
@@ -79,4 +77,4 @@ Any reader (human or agent) checks freshness by running `scripts/summary-check.s
 ## Out of scope for this workflow
 
 - Mutating beads. `update-summary` is read-only against `.beads/`.
-- Re-planning. Even if `bd ready` is empty and the graph is incomplete, `update-summary` does not create issues.
+- Re-planning. Even if no open tasks remain and the graph is incomplete, `update-summary` does not create issues.
diff --git a/skills/research-step/SKILL.md b/skills/research-step/SKILL.md
index 3735bb5..49a7fec 100644
--- a/skills/research-step/SKILL.md
+++ b/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Read(templates/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. Each unit of work is a typed sub-issue whose `metadata.research_step.output` matches a JSON schema in `assets/schemas.yaml`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -31,41 +31,12 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 |---|---|---|
 | **brainstorm** | Default. Conversational exploration of current state; drafts/refines `mission.md`; hands off to other workflows when the user is ready to act. | `workflows/brainstorm.md` |
 | **init** | Set up the environment: install `bd`/`jq`, run `bd init`, verify `scripts/summary-check.sh`. Hands off to **plan**. | `workflows/init.md` |
-| **plan** | Create or extend the graph. Bootstraps the epic and first tasks from `mission.md`, or adds the next tasks after one closes. | `workflows/plan.md` |
-| **execute** | Run one ready task end-to-end, then hand off to **plan** (which chains to **update-summary**). | `workflows/execute.md` |
+| **plan** | Create or extend the graph. Bootstraps the epic + initial frontier from `mission.md`, or replans downstream tasks after a closed task. | `workflows/plan.md` |
+| **execute** | Run one ready task end-to-end. Hands off to **plan** when the closed task type unlocks new structure; otherwise to **update-summary**. | `workflows/execute.md` |
 | **update-summary** | Regenerate `summary.md` from beads. Idempotent — no-op when `scripts/summary-check.sh` reports `status: fresh`. | `workflows/update-summary.md` |
 
 Task-type schemas live in `assets/schemas.yaml`.
 
-## Plan templates
-
-A template is the plan for a recurring kind of study. Each lives at `templates/<name>.md`: a diagram plus a table of nodes — `id`, `type`, `inputs`, what to do, and any skill to use. `plan` follows the template and adds no wiring of its own. `mission.md` names the template; with none named, use `hypothesis_driven_research`.
-
-- Create one task per node, in dependency order, using the row's text as the description. Don't run ahead of the diagram: at bootstrap create only the first tasks, up to the first "for each"; create the rest as their inputs close.
-- **For each:** a `for each X in <node>` block makes one copy of its tasks per item, once `<node>` closes.
-- **After a for-each:** a task that follows the block waits for every copy, not for the block's source.
-- A node's `inputs` come from its row (or its arrow in the diagram): set the task's inputs from that and block it on each. (`schemas.yaml` is output shape only — no wiring.)
-- Don't add tasks the template doesn't have.
-
-Available templates:
-
-| Name | Purpose |
-|---|---|
-| `data_driven_theory_generation` | See which of an AutoDS run's most surprising findings hold up on independent data, then build theories on the ones that do and test the most promising with new experiments. |
-| `hypothesis_driven_research` | Literature-grounded: survey, raise a hypothesis per gap, test each, synthesize. |
-
-### Task outputs
-
-Task inputs live in the bd issue itself (`bd show <bd-id>` and `metadata.research_step`). Only outputs land on disk, under `.asta/tasks/<bd-id>/`:
-
-| Path | Role |
-|---|---|
-| `.asta/tasks/<bd-id>/output.md` | Human-readable result. **Must link to every file under `artifacts/` it references** using file-relative markdown links (e.g. `[theories](artifacts/theories.json)`, `![figure 1](artifacts/fig1.png)`). |
-| `.asta/tasks/<bd-id>/output.json` | Structured result matching the task type's schema in `assets/schemas.yaml`. Sidecar paths use run-root-relative form (`.asta/tasks/<bd-id>/artifacts/<file>`). |
-| `.asta/tasks/<bd-id>/artifacts/` | Every other file the task produces: sidecar JSON (theory_store, paper_store, novelty_results, extraction_schema, etc.), downloaded data, code, figures, logs, PDF/TEX exports. Templates do not spell out filenames; pick reasonable names inside `artifacts/`. |
-
-Cross-task references in `output.json` use the absolute run-root-relative path; inside `output.md`, use the file-relative link form so the page renders standalone.
-
 ## Routing
 
 ### 1. Honor explicit requests
@@ -80,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → always chain to **plan** (which creates the next tasks or no-ops, then chains to **update-summary**).
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/skills/research-step/assets/schemas.yaml b/skills/research-step/assets/schemas.yaml
index 888db1b..b9643b3 100644
--- a/skills/research-step/assets/schemas.yaml
+++ b/skills/research-step/assets/schemas.yaml
@@ -1,82 +1,436 @@
-# Output shapes for research-step tasks. Each task stores its output at
-# metadata.research_step.output, matching the shape under `output:` for its type.
-# Wiring (which task feeds which) lives in the templates, not here.
-
-schema_version: 1
-
-task_types:
-
-  scope:
-    output:
-      question: string                   # the precise research question
-      boundaries: [string]               # what is in / out of scope
-      success_criteria: [string]         # how we know we have answered it
-
-  definitions:
-    output:
-      terms:
-        - name: string
-          operational_definition: string
-          rationale: string
-
-  literature_review:
-    output:
-      summary_path: string               # relative path; long-form context
-      key_findings: [string]             # 3-10 bullets readable without opening summary_path
-      gaps: [string]                     # gaps that motivate hypotheses
-      citations:
-        - id: string
-          title: string
-          url: string
-          relevance: string
-
-  hypothesis:
-    output:
-      statement: string                  # H_n: ...
-      rationale: string
-      falsifiable_prediction: string
-      expected_evidence: [string]
-
-  experiment_design:
-    output:
-      method: string
-      procedure: [string]                # ordered steps
-      variables:
-        independent: [string]
-        dependent: [string]
-        controls: [string]
-      artifacts_expected: [string]       # paths the gathering step will produce
-
-  evidence_gathering:
-    output:
-      artifacts:
-        - path: string
-          kind: string                   # data | log | figure | code | other
-          description: string
-      log_path: string                   # what was actually run
-      deviations: [string]               # ways execution diverged from design
+version: 1
 
-  auto_discovery:
-    output:
-      runid: string                      # the AutoDS run (created or imported)
-      status: string                     # SUCCEEDED | FAILED | CANCELLED | ...
-      experiments_path: string           # artifacts/experiments_<runid>.json; full node-level export
-      surprising_nodes:
-        - id: string                     # e.g. node_3_0
-          surprise: number
-          finding: string
+enums:
+  outcome:               [held, partial, failed, n/a]
+  testability:           [tested, proxy_only, untestable]
+  construct_equivalence: [equivalent, proxy, mismatch]
+  feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
+  independence_axis:     [region, instrument, method, construct, temporal, population]
+  generation_objective:  [accuracy_focused, novelty_focused]
+  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  novelty:               [established, derivable, genuinely_new]
+  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  priority:              [high, medium, low]
+  access_status:         [acquired, open_unfetched, restricted, not_found]
+  holdout_verdict:       [held, failed, untested]
+
+types:
+
+  artifact:
+    artifactId: string
+    name: string
+    description: string
+    parts: [object]
+    metadata: object
+
+  experiment:
+    experiment_id: string
+    status: string
+    hypothesis: string
+    analysis: string
+
+  empirical_law:
+    id: string
+    statement: string
+    construct: string
+    source_operationalization: string
+    source_node: string
+    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    grouping_rationale: string
+    outcome: outcome                       
+    testability: testability              
+    independence_axes: [independence_axis]
+    effect_size_source: string
+    effect_size_reproduction: string
+    replication_path: string
+
+  dataset:
+    id: string
+    definition: string
+    source: string
+    n: number
+    sampling: string
+    variables: [string]
+    covers_laws: [string]
+
+  data_source:                       # links a run dataset to the paper and repository it came from
+    id: string
+    dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
+    paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
+    paper_title: string
+    paper_url: string
+    data_availability: string        # the paper's data-availability statement, verbatim or summarized
+    repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
+    identifier: string               # DOI / accession / direct URL for the data
+    access_status: access_status     # acquired | open_unfetched | restricted | not_found
+    local_path: string               # repo-root-relative path once acquired (else empty)
+    covers_laws: [string]
+
+  cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
+    id: string
+    research_question: string        # the intent the discovery runs against (from mission.md)
+    inclusion_criteria: string
+    exclusion_criteria: string
+    sampling: string
+    source_data_sources: [string]    # data_source ids the cohort was assembled from
+    discovery_subset: {definition: string, n: number, path: string}   # what discovery sees
+    holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
+    run_id: string                   # the stood-up auto-ds run (autodiscovery create)
+
+  reproduction_design:
+    law_id: string
+    experiment_name: string
+    plain_language_description: string
+    original_operationalization: string
+    independent_operationalization: string
+    construct_equivalence: construct_equivalence
+    feasibility: feasibility
+    required_data: string
+    data_gap: string
 
   analysis:
-    output:
-      verdict: enum [supported, refuted, inconclusive]
-      confidence: number                 # 0.0 - 1.0
-      reasoning: string
-      caveats: [string]
-
-  synthesis:
-    output:
-      answer: string                     # answer to scope.question
-      supporting_hypotheses: [bd_id]
-      refuted_hypotheses: [bd_id]
-      open_questions: [string]           # become discovered-from edges on re-plan
-      report_path: string                # generated markdown report
+    final_answer: string
+    assumptions: [string]
+    figures: [{caption: string, image: string}]
+    code: string
+
+  audit_report:
+    subject_id: string                     
+    analysis_id: string
+    challenges: [{concern: string, check: string, outcome: string}]
+    artifacts_found: [string]
+    verdict_survives: boolean
+    recommended_adjustment: string
+
+  extracted_data:
+    id: string
+    run_id: string
+    paper_id: string
+    extraction_schema_id: string
+    rows:
+      - name_short: string
+        name_full: string
+        brief_description: string
+        citation_title: string
+        uuid: string
+
+  theory:
+    id: string
+    name: string
+    description: string
+    theory_query: string
+    objective: generation_objective
+    grounds_law_ids: [string]
+    supporting_evidence_ids: [string]
+    components:
+      theory_statements:
+        - statement_name: string
+          theory_statement: string
+          supporting_evidence: [{text: string, uuids: [string]}]
+          conflicting_evidence: [{text: string, uuids: [string]}]
+      new_predictions_likely: [string]
+      new_predictions_unknown: [string]
+      unaccounted_for: [{text: string, uuids: [string]}]
+
+  testability_triage:
+    assessments:
+      - theory_id: string
+        testable_now: boolean
+        available_data: string
+        required_data: string
+        proposed_test: string
+        gap: string
+    testable_theory_ids: [string]
+
+  theory_evaluation:
+    id: string
+    theory_id: string
+    novelty: novelty
+    overall_support_or_contradict: string
+    overall_support_or_contradict_explanation: string
+
+  verification:
+    theory_id: string
+    prediction: string
+    verdict: verification_verdict
+    effect_size: string
+    data_used: string
+    audit_survived: boolean
+    analysis_id: string
+
+  next_run_proposal:
+    kind: next_step_kind
+    title: string
+    tests: [string]
+    data_needed: string
+    expected_signature: string
+    priority: priority
+
+  # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
+  # theory_report, verification_report), one standalone data-gaps report, and a
+  # theory-led master (research_report). Each carries report_path (the .md deliverable
+  # written first), a title, a one-line headline, a typed body, and `links` back to the
+  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
+  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+
+  provenance_report:
+    report_path: string
+    title: string
+    headline: string
+    sources:
+      - dataset_id: string
+        paper_title: string
+        paper_url: string
+        repository: string
+        access_status: access_status
+        local_path: string
+    acquired: [string]
+    not_acquired: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  reproduction_report:
+    report_path: string
+    title: string
+    headline: string
+    method_note: string
+    laws_ledger:
+      - law_id: string
+        statement: string
+        outcome: outcome
+        testability: testability
+        effect_size_source: string
+        effect_size_reproduction: string
+        independence_axes: [independence_axis]
+        evidence: string
+    what_held: [string]
+    what_failed_or_untestable: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  theory_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theories:
+      - theory_id: string
+        name: string
+        objective: generation_objective
+        one_line: string
+        grounds_law_ids: [string]
+        novelty: novelty
+        testable_now: boolean
+        supporting_evidence_ids: [string]
+    novelty_summary: string
+    new_predictions: [string]
+    open_threads: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  verification_report:
+    report_path: string
+    title: string
+    headline: string
+    novelty_by_verification:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verdict: verification_verdict
+        effect_size: string
+        data_used: string
+        audit_survived: boolean
+    what_was_tested: string
+    what_could_not_be_tested: [string]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  data_gaps_report:
+    report_path: string
+    title: string
+    headline: string
+    gaps:
+      - item: string
+        missing_data: string
+        blocks: string
+        severity: priority
+        arose_in: string
+    next_steps: [next_run_proposal]
+    links: [{label: string, ref: string}]
+
+  research_report:
+    report_path: string
+    title: string
+    headline: string
+    mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]}
+    theory_highlights:
+      - theory_id: string
+        claim: string
+        novelty: novelty
+        verification: verification_verdict
+    inference_chain: [{claim: string, chain: [string]}]
+    what_was_done: [string]
+    sub_reports: [{kind: string, report_path: string, one_line: string}]
+    tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+
+  discovery_report:                  # synthesis output of the auto_discovery flow
+    report_path: string
+    title: string
+    headline: string
+    laws:
+      - law_id: string
+        statement: string
+        surprise: number             # the discovery run's surprise signal for this candidate law
+        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
+        effect_size: string
+    next_steps: [next_run_proposal]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+tasks:
+  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
+  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
+  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
+  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
+  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
+  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
+  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
+  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
+  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
+  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
+  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
+  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
+  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
+  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
+  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
+  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
+  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
+  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
+  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
+  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
+  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
+  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
+  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
+  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
+  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
+  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+
+flows:
+
+  data_and_literature_grounded_theory_generation:
+    mission: Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.
+    data_provenance:
+      mission: Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.
+      chain:
+        - {workflow: data_provenance, mission: Source the papers and datasets the run named in the mission was built on; acquire the open data and record what is restricted.}
+    reproduction:
+      mission: Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.
+      chain:
+        - {workflow: reproduction, mission: Import the run named in the mission; reproduce each law on independent data with construct-equivalence and a feasibility gate.}
+    theorizer:
+      mission: Generate literature- and data-grounded theories of the reproduced laws and score their novelty.
+      chain:
+        - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
+    verification:
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      analysis:
+        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      theory_verification:
+        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+        chain: []
+    verification_synthesis:
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      chain: []
+    gap_synthesis:
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      chain: []
+    final_synthesis:
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      chain: []
+
+  data_provenance:
+    mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
+    provenance_search:
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      chain: [asta literature find, asta papers search]
+    provenance_extraction:
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    data_acquisition:
+      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      chain: [asta documents, asta autodiscovery upload]
+    provenance_synthesis:
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      chain: []
+
+  reproduction:
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    data_driven_discovery:
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      chain: [asta autodiscovery run, asta autodiscovery experiments]
+    law_extraction:
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      chain: []
+    evidence_gathering:
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
+    replication:
+      mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
+      reproduction_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+        chain: [asta experiment]
+      analysis:
+        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduction_audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      reproduce:
+        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+        chain: []
+    reproduction_synthesis:
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      chain: []
+
+  theorizer:
+    mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
+    evidence_extraction:
+      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    theory_generation:
+      mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
+      theory_formation:
+        mission: Form theories from the shared extraction store under this branch's objective.
+        chain: [asta generate-theories form-theory]
+    testability_triage:
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      chain: []
+    novelty_assessment:
+      mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      chain: [asta generate-theories evaluate-novelty]
+    theory_synthesis:
+      mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      chain: []
+
+  auto_discovery:
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    cohort_assembly:
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
+    discovery_run:
+      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      chain: [asta autodiscovery submit, asta autodiscovery experiments]
+    replication:
+      mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      holdout_replication:
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        chain: [asta analyze-data submit, asta analyze-data poll]
+    discovery_synthesis:
+      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      chain: []
diff --git a/skills/research-step/scripts/close-task.sh b/skills/research-step/scripts/close-task.sh
new file mode 100755
index 0000000..673b23f
--- /dev/null
+++ b/skills/research-step/scripts/close-task.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# close-task.sh <issue-id> <output-json> <output-markdown>
+# Publish a task's output and finish it: write output_json + output_markdown into the issue
+# metadata, validate output_json against the schema, close the issue, assert it closed, then
+# close any ancestor group whose last child just closed.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+[[ $# -eq 3 ]] || { echo "usage: close-task.sh <issue-id> <output-json> <output-markdown>" >&2; exit 1; }
+id="$1"; oj="$2"; om="$3"
+[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; }
+[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; }
+jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; }
+
+# 1. publish: merge output_json + output_markdown into the existing research_step metadata
+cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
+merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
+  '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
+tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+bd update "$id" --metadata @"$tmp" >/dev/null
+
+# 2. validate structurally (reads the issue back; no style lint)
+bash "$here/validate-output.sh" "$id"
+
+# 3. close and 4. assert closure
+bd close "$id" >/dev/null
+[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \
+  || { echo "close-task: $id did not close" >&2; exit 2; }
+echo "closed $id"
+
+# 5. cascade: close each ancestor group whose direct children are all closed
+cur_id="$id"
+while [[ "$cur_id" == *.* ]]; do
+  parent="${cur_id%.*}"
+  bd show "$parent" --json >/dev/null 2>&1 || break
+  open_kids="$(bd list --json | jq --arg p "$parent" '
+    [ .[]
+      | select(.id | startswith($p + "."))
+      | select((.id[($p|length)+1:] | contains(".")) | not)
+      | select(.status != "closed") ] | length')"
+  [[ "$open_kids" -eq 0 ]] || break
+  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  cur_id="$parent"
+done
diff --git a/skills/research-step/scripts/create-task.sh b/skills/research-step/scripts/create-task.sh
new file mode 100755
index 0000000..6024cf6
--- /dev/null
+++ b/skills/research-step/scripts/create-task.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# create-task.sh <parent-id> <task_type> <flow> <title> <brief-description> [input-id ...]
+# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description,
+# and initialized research_step metadata. output_json / output_markdown stay null until
+# execute publishes them via close-task.sh. Prints the new issue id.
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
+parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
+
+python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
+PY
+
+[[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
+[[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
+[[ "${#desc}" -le 200 ]]    || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; }
+
+if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
+meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/skills/research-step/scripts/validate-output.sh b/skills/research-step/scripts/validate-output.sh
index ab46d65..af3b8f6 100755
--- a/skills/research-step/scripts/validate-output.sh
+++ b/skills/research-step/scripts/validate-output.sh
@@ -1,166 +1,43 @@
 #!/usr/bin/env bash
-# validate-output.sh — structural validation of a research_step output JSON.
-#
-# Usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]
-#
-# Verifies that the JSON file:
-#   1. parses
-#   2. carries the metadata envelope
-#      ({research_step: {task_type, inputs, output_schema_version, output}})
-#   3. has every required `output.<key>` for the given <task_type> per
-#      assets/schemas.yaml (schema_version: 1)
-# If [task-dir] (e.g. .asta/tasks/<id>) is given, also runs document-quality
-# checks on its output.md.
-#
-# Exit codes:
-#   0  — valid
-#   2  — JSON parse error
-#   3  — unknown task_type
-#   4  — missing required field
-#   5  — task_type mismatch with envelope
-#   6  — required output.md missing (only when [task-dir] supplied)
-#   7  — output.md empty or a stub (only when [task-dir] supplied)
-#   8  — output.md has no markdown links (only when [task-dir] supplied)
-#   9  — a named entity is unlinked (only when [task-dir] supplied)
-#   10-15 — report node only (when artifacts/report.tex exists): report.pdf missing (10),
-#           no title-page workflow diagram (11), no TOC (12), <8 sections (13),
-#           <3 embedded figures (14), a required section is missing (15)
-#
-# Structural checks only — required fields, working links, and the report's basic pieces.
+# validate-output.sh <issue-id> — structural check of a task's stored output_json.
+# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
+# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
+# No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
 set -euo pipefail
-
-if [[ $# -lt 2 || $# -gt 3 ]]; then
-  echo "usage: validate-output.sh <task_type> <metadata-json-file> [task-dir]" >&2
-  exit 1
-fi
-
-task_type="$1"
-file="$2"
-task_dir="${3:-}"
-
-if ! jq -e . "$file" > /dev/null 2>&1; then
-  echo "validate-output: $file is not valid JSON" >&2
-  exit 2
-fi
-
-# Required output fields, mirroring assets/schemas.yaml (schema_version: 1).
-case "$task_type" in
-  scope)              required="question boundaries success_criteria" ;;
-  definitions)        required="terms" ;;
-  literature_review)  required="summary_path key_findings gaps citations" ;;
-  hypothesis)         required="statement rationale falsifiable_prediction expected_evidence" ;;
-  experiment_design)  required="method procedure variables artifacts_expected" ;;
-  evidence_gathering) required="artifacts log_path deviations" ;;
-  auto_discovery)     required="runid status experiments_path surprising_nodes" ;;
-  analysis)           required="verdict confidence reasoning caveats" ;;
-  synthesis)          required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;;
-  *)
-    echo "validate-output: unknown task_type '$task_type'" >&2
-    echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis" >&2
-    exit 3
-    ;;
-esac
-
-# The envelope must carry the matching task_type so we don't validate scope JSON
-# against an analysis schema by accident.
-envelope_type=$(jq -r '.research_step.task_type // empty' "$file")
-if [[ -z "$envelope_type" ]]; then
-  echo "validate-output: $file missing .research_step.task_type" >&2
-  exit 5
-fi
-if [[ "$envelope_type" != "$task_type" ]]; then
-  echo "validate-output: envelope task_type='$envelope_type' but expected '$task_type'" >&2
-  exit 5
-fi
-
-# Envelope shape.
-for key in inputs output_schema_version output; do
-  if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: $file missing .research_step.$key" >&2
-    exit 5
-  fi
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
+id="$1"
+
+rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // empty')"
+[[ -n "$rs" ]] || { echo "validate-output: $id has no metadata.research_step" >&2; exit 2; }
+task_type="$(jq -r '.task_type // empty' <<<"$rs")"
+[[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
+
+expected="$(python3 - "$schemas" "$task_type" <<'PY'
+import yaml, sys
+d = yaml.safe_load(open(sys.argv[1]))
+t = d["tasks"].get(sys.argv[2])
+if t is None: sys.exit(3)
+print(" ".join(t["output"]))
+PY
+)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+
+got="$(jq -c '.output_json // empty' <<<"$rs")"
+[[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
+
+for k in $expected; do
+  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
+    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
 done
-
-# Required output fields.
-for key in $required; do
-  if ! jq -e ".research_step.output | has(\"$key\")" "$file" >/dev/null; then
-    echo "validate-output: missing required field 'output.$key' for task_type '$task_type'" >&2
-    exit 4
-  fi
-done
-
-# Type spot-checks for the high-leverage cases. Not exhaustive — just the
-# fields where a wrong type at this layer would silently break update-summary rendering
-# or downstream tasks.
-case "$task_type" in
-  literature_review)
-    jq -e '.research_step.output.key_findings | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.key_findings must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.gaps | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.gaps must be an array" >&2; exit 4; }
-    jq -e '.research_step.output.citations | type == "array"' "$file" >/dev/null \
-      || { echo "validate-output: output.citations must be an array" >&2; exit 4; }
-    ;;
-  analysis)
-    jq -e '.research_step.output.verdict | IN("supported", "refuted", "inconclusive")' "$file" >/dev/null \
-      || { echo "validate-output: output.verdict must be one of supported|refuted|inconclusive" >&2; exit 4; }
-    jq -e '.research_step.output.confidence | type == "number" and . >= 0 and . <= 1' "$file" >/dev/null \
-      || { echo "validate-output: output.confidence must be a number in [0, 1]" >&2; exit 4; }
-    ;;
-esac
-
-# output.md document-quality gate. Every task must produce a human-readable
-# output.md (skill "Task outputs" table) that links the entities it names.
-if [[ -n "$task_dir" ]]; then
-  md="$task_dir/output.md"
-  if [[ ! -f "$md" ]]; then
-    echo "validate-output: required output.md not found at '$md'" >&2
-    exit 6
-  fi
-  if [[ "$(grep -cve '^[[:space:]]*$' "$md" || true)" -lt 3 ]]; then
-    echo "validate-output: output.md is empty or a stub (<3 non-blank lines)" >&2
-    exit 7
-  fi
-  if ! grep -qE '\[[^]]+\]\([^)]+\)' "$md"; then
-    echo "validate-output: output.md has no markdown links" >&2
-    exit 8
-  fi
-  # Strip links, then flag any named entity still bare in output.md / report.tex.
-  unlinked=$(for f in "$md" "$task_dir/artifacts/report.tex" "$task_dir/report.tex"; do
-    [[ -f "$f" ]] && perl -ne '
-      if (/^\s*```/) { $fence = !$fence; next } next if $fence;
-      s/!?\[[^\]]*\]\([^)]*\)//g; s/\\(?:href|ref|autoref|includegraphics|label|cite[a-z]*)(?:\[[^\]]*\])?\{[^}]*\}(\{[^}]*\})?//g;
-      while (/(node_\d+_\d+|\bL\d+\b|theory-\d+-\d+|\([A-Z][a-z]+(?: et al\.?)?,? \d{4}\)|[\w.\/-]+\.(?:csv|jsonl|json|png|tex|pdf|parquet|xlsx))/g) { print "$ARGV:$.: $1\n" }
-    ' "$f"
-  done) || true
-  if [[ -n "$unlinked" ]]; then
-    echo "$unlinked" >&2
-    echo "validate-output: named entities above are unlinked" >&2
-    exit 9
-  fi
-
-  # The report's basics. Only the report node makes report.tex; when it exists,
-  # check it has what report_example.tex has. Each failure points back to it.
-  rpt="$task_dir/artifacts/report.tex"
-  if [[ -f "$rpt" ]]; then
-    ref="templates/examples/report_example.tex"
-    rfail() {
-      echo "report-gate: $1" >&2
-      echo "  -> this is the minimum, not the goal. Re-read $ref in full and match" >&2
-      echo "     its depth and citation density before retrying." >&2
-      exit "$2"
-    }
-    [[ -f "$task_dir/artifacts/report.pdf" ]] || rfail "report.pdf missing — compile report.tex" 10
-    grep -q '\\begin{tikzpicture}\|\\includegraphics' \
-      <(sed -n '/begin{titlepage}/,/end{titlepage}/p' "$rpt") \
-      || rfail "no title-page workflow diagram (see the TikZ flowchart in $ref)" 11
-    grep -q '\\tableofcontents' "$rpt"                  || rfail "no \\tableofcontents" 12
-    [[ "$(grep -c '\\section{' "$rpt")" -ge 8 ]]        || rfail "<8 sections — likely a skimmed, thin report" 13
-    [[ "$(grep -c '\\includegraphics' "$rpt")" -ge 3 ]] || rfail "<3 embedded run figures" 14
-    for s in Mission Abstract Methods Results Conclusion Catalogue Datasets References; do
-      grep -qi "section{[^}]*$s" "$rpt" || rfail "missing section '$s' (present in $ref)" 15
-    done
-  fi
-fi
+while IFS= read -r k; do
+  case " $expected " in *" $k "*) ;; *)
+    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
+  esac
+done < <(jq -r 'keys[]' <<<"$got")
+jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
+  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
 
 echo "ok"
diff --git a/skills/research-step/scripts/write-meta.sh b/skills/research-step/scripts/write-meta.sh
deleted file mode 100755
index 6e7d71a..0000000
--- a/skills/research-step/scripts/write-meta.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-# write-meta.sh — materialize a metadata JSON blob to a temp file and print
-# its path, suitable for `bd update <id> --metadata @<path>` or
-# `bd create ... --metadata=@<path>`.
-#
-# Reads JSON from stdin (or from $1 if a path is given), validates that it
-# parses, and writes it under $TMPDIR with mode 0600. The path is printed on
-# stdout so the caller can splice it into a bd command.
-#
-# Why this exists: `bd update --metadata` accepts either a JSON string or
-# `@file.json`. Inlining a JSON string requires `"$(cat /tmp/x.json)"` (a
-# non-bd shell op the SKILL.md frontmatter does not permit), and shell quoting
-# gets fragile with embedded quotes. Materializing a file once and using
-# `@path` keeps everything in `Bash(bd:*)` territory.
-set -euo pipefail
-
-tmp=$(mktemp -t research-step-meta.XXXXXX.json)
-trap 'rm -f "$tmp"' ERR
-
-if [[ $# -ge 1 ]]; then
-  cp "$1" "$tmp"
-else
-  cat > "$tmp"
-fi
-
-if ! jq -e . "$tmp" >/dev/null 2>&1; then
-  echo "write-meta: input is not valid JSON" >&2
-  rm -f "$tmp"
-  exit 2
-fi
-
-chmod 0600 "$tmp"
-echo "$tmp"
diff --git a/skills/research-step/templates/data_driven_theory_generation.md b/skills/research-step/templates/data_driven_theory_generation.md
deleted file mode 100644
index 15a5875..0000000
--- a/skills/research-step/templates/data_driven_theory_generation.md
+++ /dev/null
@@ -1,118 +0,0 @@
----
-name: data_driven_theory_generation
-description: |
-  See which of an AutoDS run's most surprising findings hold up on independent
-  data, then build theories on the ones that do and test the most promising with new experiments.
----
-
-# Data-driven theory generation
-
-Take an AutoDS run's most surprising findings, test whether each holds up on data the run didn't use, then build theories on what survives and run follow-up experiments.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  data_provenance["Data provenance"]
-  definitions --> data_provenance
-  auto_discovery["AutoDS run (+ top 10 surprising findings)"]
-  data_provenance --> auto_discovery
-  subgraph sub1["for each of the 10 surprising findings"]
-    direction TB
-    hypothesis["Restate finding"]
-    literature_review["Literature search"]
-    experiment_design["Pre-register test"]
-    evidence_gathering["Find independent data"]
-    analysis["Replicate"]
-    hypothesis --> literature_review --> experiment_design --> evidence_gathering --> analysis
-    analysis -- "retry: inconclusive → re-spec" --> experiment_design
-    analysis -- "retry: bad data → re-locate" --> evidence_gathering
-  end
-  auto_discovery --> hypothesis
-  replication_synthesis["Replication summary (k of 10, by mechanism)"]
-  analysis --> replication_synthesis
-  theorizer_theories["Theorizer-grounded theories"]
-  replication_synthesis --> theorizer_theories
-  novelty["Score theories for novelty"]
-  theorizer_theories --> novelty
-  subgraph sub2["for each of the top 3 theories"]
-    direction TB
-    followon_exp_design["Pre-register experiment (AED)"]
-    followon_evidence["Find new data"]
-    followon_analysis["Run, or leave as a proposal"]
-    followon_exp_design --> followon_evidence --> followon_analysis
-  end
-  novelty --> followon_exp_design
-  report["Closing report"]
-  followon_analysis --> report
-  report --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | Anchor the question on the AutoDS run named in `mission.md`. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against the data. | — |
-| `data_provenance` | `evidence_gathering` | `definitions` | Load the `asta://` documents and dataset URIs from `mission.md` and index any local PDFs. Record which datasets the AutoDS run itself used — later steps need that to judge what counts as independent. | `asta-preview:local-paper-index` |
-| `auto_discovery` | `auto_discovery` | `scope, data_provenance` | Import the `run_pointer:` run, or create one against the `datasets[]`. Export the full results to `artifacts/experiments_<runid>.json`, and list the 10 highest-surprise nodes — the findings to replicate. | `asta-preview:autodiscovery` |
-| `hypothesis` | `hypothesis` | `auto_discovery` | For each of the 10: restate the node's finding as one claim to replicate, citing the node. | — |
-| `literature_review` | `literature_review` | `hypothesis, data_provenance` | Search the literature for this finding with `asta-preview:find-literature` — start from the `data_provenance` documents, then go to PaperFinder. As you read, pull out the **datasets those papers used and where to get them** (repository, data DOI, availability statement) — these are the leads `evidence_gathering` fetches. The job isn't just context; it's to find real, independent data to re-test the finding. | `asta-preview:find-literature`, `asta-preview:asta-documents` |
-| `experiment_design` | `experiment_design` | `hypothesis, literature_review` | Pre-register the replication test before any results: state the pass/fail rule — same sign and significant, or effect inside the original confidence interval. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design, data_provenance` | Go get an external dataset to re-test the finding: follow `literature_review`'s leads to the public sources those papers used (repositories, data DOIs, availability statements) and **download** the most relevant one. This is the expected path — a test on the run's own inputs isn't independent, so don't settle for it. Log every attempt (found / downloaded / blocked) in `artifacts/acquisition_ledger.json`. Only once a documented search turns up nothing usable may you fall back to the run's own sources, marked the weakest tier. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Replicate in DataVoyager (`asta analyze-data submit`) against the pre-registered rule. The verdict must come from a run on real data, not the AutoDS export or your own reasoning. Record the tier: replicated on independent data / consistent within the run's own data (fallback) / not testable. No data, no close — leave it blocked. | `asta-preview:analyze-data` |
-| `replication_synthesis` | `synthesis` | `analysis` (all 10) | Report how many of the 10 replicated, which failed, and which couldn't be tested — each with its tier. Group the findings into mechanisms for the report and the theorizer. | — |
-| `theorizer_theories` | `hypothesis` | `scope, replication_synthesis` | Run the theorizer once (the question plus a statement of which findings replicated; see [example](examples/theorizer_mission_example.md)). No `paper_store`; set `max_papers_to_retrieve: 100`. Keep only theories anchored to at least one replicated finding. Map theory→`statement`, anchoring findings→`rationale`, prediction→`falsifiable_prediction`. | `asta-preview:generate-theories` |
-| `novelty` | `hypothesis` | `theorizer_theories` | Score the theories for novelty and re-emit them ranked. The follow-on tests the top 3 by novelty × feasibility. | `asta-preview:generate-theories` |
-| `followon_exp_design` | `experiment_design` | `novelty` | For each of the top 3 theories: pre-register an experiment for it with the AutoExperimentDesigner (`asta auto-exp-designer design-experiment`), using the 5 most related papers. Not `asta-preview:experiment` — that runs Panda, a different system. | `asta auto-exp-designer` |
-| `followon_evidence` | `evidence_gathering` | `followon_exp_design` | Go get genuinely new data for the experiment — fetch it from the public sources the related papers used, not a re-slice of the replication data. Log attempts in the ledger. If nothing usable exists, the pre-registered design is the deliverable — a proposal for future data. | — |
-| `followon_analysis` | `analysis` | `followon_exp_design, followon_evidence` | If the new data exists, run the experiment in DataVoyager to a verdict and save figures, tables, and logs to `artifacts/`. If it doesn't, close it as untested — `inconclusive`, with a caveat that it's a pre-registered proposal, linking the design — rather than forcing a run or blocking the report. Retry (only when a run failed to actually test the theory) per the table below. | `asta-preview:analyze-data` |
-| `report` | `synthesis` | `replication_synthesis, followon_analysis` (all 3) | Write `artifacts/report.tex` → PDF and a short `output.md`. Report the replication results and all three follow-on outcomes — tested (held or failed) or proposed (untested, no data). Read [`report_example.tex`](examples/report_example.tex) in full first and match its depth and citation density. Embed every figure. `validate-output.sh` checks the report has the basics before it closes. | — |
-
-The 10 finding-restatement `hypothesis` tasks are filled and closed at creation — see plan.md. (`theorizer_theories` and `novelty` are `hypothesis`-typed too, but they run a skill, so they execute like any other task.)
-
-## Running DataVoyager
-
-Both the per-finding `analysis` and `followon_analysis` run in DataVoyager — at most 3 at a time, attaching every dataset up front. A replication needs data the run didn't use, so go find and download it — the literature is your map to what's public. Combining the run's own sources is the weakest tier, allowed only after the acquisition ledger shows a real external search came up empty; "stayed local" is not a replication. Only call data `data_unavailable` once the ledger shows the trace failed — a 403/404 on someone else's resource isn't proof — then leave that `analysis` blocked, not closed.
-
-A clean result against the pre-registered rule — replicated or not — is the verdict, not a reason to retry. Retry only when the run didn't actually test the claim:
-
-| What DataVoyager did | Go back to | Fix |
-|---|---|---|
-| Couldn't load or join the data (`KeyError`, missing columns, mismatched keys, duplicate rows) | `evidence_gathering` (≤3) | Re-locate or pre-process. If a multi-file join keeps failing, pre-join into one or two master panels in a documented script and resubmit, recording the join rules in `provenance.json`. |
-| Ran but was underpowered or inconclusive on its own terms | `experiment_design` (≤3) | Reconsider power or controls — but do not move the pre-registered bar to manufacture a pass. |
-| Infra failure (kernel error, timeout, transcription glitch) | `analysis` (≤3) | Resubmit as-is. If it recurs, switch to the pre-joined master panels above. |
-
-## mission.md
-
-- `run_pointer:` — the AutoDS run to import (omit to create one).
-- `datasets[]` — input dataset URIs for a new run.
-- A focus statement in the body — the question under study.
-
-Unless the user explicitly says to use local inputs only, fetch external public data for replication.
-
-## Writing the report and outputs
-
-These apply to every `output.md` and the final report — documents a domain expert will read, not work logs. `validate-output.sh` checks links and the report's structure automatically; the rest is on you.
-
-- **Tone.** Neutral, for an expert in the field. No exclamations, no filler, no "we will now…".
-- **Cite specifics.** Every non-trivial claim points to a paper, dataset, or experiment; effect sizes, p-values, and thresholds always cite the experiment that produced them. Number the computational experiments `E1, E2, …` and list each (finding → test → result → verdict) in an appendix.
-- **Link what you name.** Every finding, paper, theory, dataset, run, and experiment is a real link, never bare text or `node_3_0`:
-
-  | thing | link to |
-  |---|---|
-  | AutoDS node (`node_3_0`) | `artifacts/experiments_<runid>.json`, at the node id |
-  | paper | the asta document, paper URL, or `data_provenance` entry |
-  | theory | `artifacts/theorizer_result.json`, or the task that produced it |
-  | DataVoyager run | `artifacts/dv_result*.json`, or the task that exported it |
-  | dataset | the file under `inputs/`, or the Datasets appendix |
-  | experiment E-number | its appendix entry |
-
-- **Show figures.** Every figure an `analysis` produces is embedded in `output.md` and `\includegraphics`'d in the report, so the page stands alone.
-- **Write about the science, not the workflow.** No task ids, "epic", or node names in the prose.
-- **Be honest about what held up.** Report the replication rate and the tiers plainly — a finding that didn't replicate, or couldn't be tested on independent data, is a result, not a gap to paper over. Don't invent experiments beyond what was designed.
diff --git a/skills/research-step/templates/examples/report_example.tex b/skills/research-step/templates/examples/report_example.tex
deleted file mode 100644
index e87ebf5..0000000
--- a/skills/research-step/templates/examples/report_example.tex
+++ /dev/null
@@ -1,620 +0,0 @@
-% Worked example of the `report` node output, from the polio_final_v2 grounded-theory-generation run.
-%
-% This file is a reference for structure, tone, citation density, hyperlink discipline, appendix
-% structure, and figure / table macros. Model your `report.tex` on it; don't copy it verbatim.
-%
-% The `\includegraphics{report_example_figures/*.png}` calls below show how a report embeds its
-% figures. Those PNGs are illustrative and not bundled, so this reference does not compile as-is;
-% your run embeds its own figures from `artifacts/`.
-
-\documentclass[11pt]{article}
-\usepackage[margin=1in]{geometry}
-\usepackage{amsmath}
-\usepackage{amssymb}
-\usepackage{hyperref}
-\usepackage{booktabs}
-\usepackage{longtable}
-\usepackage{array}
-\usepackage{enumitem}
-\usepackage{xcolor}
-\usepackage{microtype}
-\usepackage{graphicx}
-\usepackage{titling}
-\usepackage{fancyhdr}
-\usepackage{titlesec}
-\usepackage{tabularx}
-\usepackage{tikz}
-\usetikzlibrary{shapes.geometric, arrows.meta, positioning, calc, fit, backgrounds}
-\definecolor{paperfinderpurple}{HTML}{6D28D9}
-
-\hypersetup{
-  colorlinks=true,
-  linkcolor=blue!55!black,
-  urlcolor=blue!55!black,
-  citecolor=blue!55!black,
-}
-
-\pagestyle{fancy}
-\fancyhf{}
-\fancyhead[L]{Multi-Agent Computational Investigation}
-\fancyhead[R]{Pakistan WPV1 Resurgence, 2022--2024}
-\fancyfoot[C]{\thepage}
-\renewcommand{\headrulewidth}{0.4pt}
-
-\titleformat{\section}{\bfseries\Large\color{blue!50!black}}{\thesection}{0.6em}{}
-\titleformat{\subsection}{\bfseries\large}{\thesubsection}{0.5em}{}
-
-\setlength{\parskip}{0.5em}
-
-\begin{document}
-
-\begin{titlepage}
-\thispagestyle{empty}
-\vspace*{0.6in}
-\begin{center}
-{\Large\bfseries\color{blue!50!black} The Role of Older Populations in Pakistan's 2022--2024 Wild Poliovirus Type 1 Resurgence}\\[2.5em]
-\end{center}
-
-\vspace*{40pt}
-
-\noindent\makebox[\textwidth][c]{%
-\begin{tikzpicture}[
-  font=\footnotesize,
-  procbox/.style={
-    rectangle, rounded corners=2pt, draw=gray!55, fill=gray!8, line width=0.5pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  agentbox/.style={
-    rectangle, rounded corners=2pt, draw=blue!55!black, fill=blue!10, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  paperbox/.style={
-    rectangle, rounded corners=2pt, draw=paperfinderpurple, fill=paperfinderpurple!12, line width=0.7pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  finalbox/.style={
-    rectangle, rounded corners=2pt, draw=green!50!black, fill=green!12, line width=0.8pt,
-    text width=2.3cm, minimum height=1.05cm, align=center, inner sep=3pt
-  },
-  arr/.style={-{Stealth[length=2.2mm]}, gray!70, line width=0.55pt},
-  loopbg/.style={draw=gray!55, dashed, rounded corners=4pt, inner sep=8pt},
-  node distance=0.45cm and 0.45cm,
-]
-% Band 1: discovery phase, left-to-right
-\node[procbox] (scope) {Scope \&\\Definitions};
-\node[procbox, right=of scope] (prov) {Data\\Provenance};
-\node[agentbox, right=of prov] (ad) {\textbf{Auto-}\\\textbf{Discovery}\\{\scriptsize 4 runs / 121 exps}};
-\node[procbox, right=of ad] (laws) {Curate Laws\\L1--L6};
-\node[procbox, right=of laws] (themes) {Cluster\\Themes};
-
-% Band 2: foreach theme, visually right-to-left so the S-curve flows cleanly
-\node[paperbox, below=1.05cm of themes] (lit) {\textbf{Paper-}\\\textbf{Finder}\\{\scriptsize lit review}};
-\node[procbox, left=of lit] (hyp) {Theme\\Hypotheses};
-\node[procbox, left=of hyp] (evid) {Theme\\Evidence};
-\node[procbox, left=of evid] (exp) {Theme Exp\\Design};
-\node[agentbox, left=of exp] (rep) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize reproduce}\\{\scriptsize E1--E14}};
-
-\begin{scope}[on background layer]
-\node[loopbg, fit=(rep)(exp)(evid)(hyp)(lit)] (loopbox) {};
-\end{scope}
-\node[font=\scriptsize\itshape, text=black, anchor=south east] at ($(loopbox.north east)+(-2pt,1pt)$) {foreach theme \& hypothesis};
-
-% Band 3: synthesis + follow-on, left-to-right
-\node[procbox, below=1.05cm of rep] (across) {Across-themes\\Synthesis};
-\node[agentbox, right=of across] (theo) {\textbf{Theorizer}\\{\scriptsize 2 passes,}\\{\scriptsize 13 theories}};
-\node[procbox, right=of theo] (nov) {Novelty\\Scoring};
-\node[agentbox, right=of nov] (aed) {\textbf{Auto-Exp-}\\\textbf{Designer}\\{\scriptsize pre-reg}};
-\node[agentbox, right=of aed] (dv2) {\textbf{Data-}\\\textbf{Voyager}\\{\scriptsize confirm}\\{\scriptsize E15--E16}};
-
-\node[finalbox, below=0.8cm of dv2] (rep_final) {\textbf{This Report}};
-
-% Band 1 arrows
-\draw[arr] (scope) -- (prov);
-\draw[arr] (prov) -- (ad);
-\draw[arr] (ad) -- (laws);
-\draw[arr] (laws) -- (themes);
-
-% Band 1 -> Band 2: straight down themes -> lit
-\draw[arr] (themes) -- (lit);
-
-% Band 2 arrows: lit -> hyp -> evid -> exp -> rep (visually right-to-left)
-\draw[arr] (lit) -- (hyp);
-\draw[arr] (hyp) -- (evid);
-\draw[arr] (evid) -- (exp);
-\draw[arr] (exp) -- (rep);
-
-% Retry self-loop on rep (black so it reads clearly)
-\draw[{Stealth[length=2.2mm]}-, dashed, draw=black, line width=0.55pt] (rep.north west) .. controls +(-0.45,0.35) and +(-0.45,-0.35) .. node[left, font=\scriptsize\itshape, text=black, xshift=-1pt]{retry $\leq 3$} (rep.south west);
-
-% Band 2 -> Band 3: straight down rep -> across
-\draw[arr] (rep) -- (across);
-
-% Band 3 arrows
-\draw[arr] (across) -- (theo);
-\draw[arr] (theo) -- (nov);
-\draw[arr] (nov) -- (aed);
-\draw[arr] (aed) -- (dv2);
-
-% Band 3 -> final report
-\draw[arr] (dv2) -- (rep_final);
-
-% Manually set the bounding box so the diagram (not the retry-loop overhang) is what gets centered.
-\path[use as bounding box] ([xshift=-4pt]scope.west |- ad.north) rectangle ([xshift=4pt]dv2.east |- rep_final.south);
-\end{tikzpicture}%
-}
-
-\vspace*{\fill}
-\begin{center}
-\footnotesize\itshape\color{gray!50!black}
-Blue and purple nodes invoke Asta agents (AutoDiscovery, PaperFinder, DataVoyager, Theorizer, AutoExperimentDesigner).\ \ Gray nodes are human-authored synthesis steps.\ \ The dashed box is a per-theme inner loop.
-\end{center}
-\end{titlepage}
-
-\tableofcontents
-\newpage
-
-%---------------------------------------------------------------
-\section{Mission}
-
-This investigation set out to answer a single focal question: \emph{What is the role of populations aged five years or older in Pakistan's persistent and resurgent wild poliovirus type 1 (WPV1) transmission between 2022 and 2024?} The mission is anchored on the empirical observation that national third-dose oral polio vaccine (Pol3) coverage in Pakistan was stable to rising across the 2021$\to$2024 resurgence window, yet annual case counts rebounded from 1 (2021) to 20 (2022), 6 (2023), and 74 (2024). The standard under-five surveillance focus of polio programs in Pakistan cannot, on its own, explain this trajectory.
-
-We approached the question with a multi-agent computational pipeline. Four prior AutoDiscovery (AD) runs over Pakistan polio surveillance, demographic, and immunization-coverage data had surfaced six cross-cutting candidate ``laws'' explaining facets of the resurgence. We replicated each law using DataVoyager (DV), an agent-driven statistical analysis system, then performed seven additional cross-source robustness experiments to test the laws against independent data or novel reformulations. Two passes of the Theorizer agent --- one accuracy-focused, one novelty-focused --- generated 13 candidate theories grounded in the AD laws and a 100-paper PaperFinder corpus. The AutoExperimentDesigner (AED) then produced pre-registered protocols for the two most promising theories, which DV executed.
-
-The mission framing accepted from the outset that ``older populations'' includes the entire $\geq$5 year cohort treated as a single group, set against the under-five vaccination target. The cohort definition was not narrowed further during the investigation. The mission explicitly required the investigation to follow the data where it led rather than to confirm any prior hypothesis about the relative contribution of adolescents, adults, or the elderly.
-
-%---------------------------------------------------------------
-\section{Abstract}
-
-We tested whether the 2022--2024 Pakistan WPV1 resurgence is consistent with the older ($\geq$5y) population functioning as a transmission reservoir, a mobility vector, or neither. The investigation comprised 15 computational experiments (E1--E15) replicating six AutoDiscovery laws and seven cross-source robustness checks, two Theorizer runs producing 13 candidate theories, and a single pre-registered follow-on test designed by AutoExperimentDesigner and executed by DataVoyager.
-
-\paragraph{Headline finding.} The combined theory ``national Pol3-WPV1 elasticity collapses above an $\approx$80\% Pol3 threshold, after which cross-border mobility-driven force of infection (FOI) becomes the dominant predictor of WPV1 transmission and detection'' is statistically supported across all three pre-registered components:
-\begin{itemize}[noitemsep]
-\item \textbf{National regime shift:} Bai-Perron break detected at 2018; threshold regression $\gamma{=}80.5\%$ (95\% bootstrap CI 79.0--82.0). The first year national Pol3 crossed 80\% was 2018.
-\item \textbf{Mobility dominance post-threshold:} District-year Poisson IRR for Afghanistan-border $\times$ post-2021 $=$ 2.11 (95\% CI 1.28--3.46), $p<0.01$. Standardized inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$, exceeding the pre-registered 1.5 threshold. Post-threshold standardized Pol3 effect is $-0.18$, 95\% CI $[-0.44, +0.03]$ (includes zero), while pre-threshold was $-0.39$.
-\item \textbf{Operational signature:} An environmental surveillance ``Sabin-low / WPV1-high'' signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a PPV ratio of 2.44 and an AUC difference of 0.23 (95\% bootstrap CI 0.09--0.35).
-\end{itemize}
-
-\paragraph{Reconciliation of the focal question.} Older populations are \emph{not} dominant transmission reservoirs in Pakistan. Three independent experiments refuted the ``adult reservoir'' framing: at the district level, under-five population share dominates 15--64 share in predicting WPV1 incidence; at the province level the 15--64 share is null for both case and environmental-surveillance outcomes; and in districts that experienced both WPV1 and cVDPV2 cases during 2019--2021, cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts (OLS coefficient on 15--64 share = $-8.01$, 95\% CI $[-12.5, -3.5]$, $p<0.001$). The role of older populations is instead as \emph{mobility vectors}. Annual Pakistan and Afghanistan WPV1 case counts are coupled, with the coupling strengthening post-2021. Border-adjacent districts show no incremental risk in the pooled 2019--2024 window but activate as a transmission predictor in the post-2021 sub-window (interaction $p{=}0.079$). Resident Afghan refugee population (UNHCR December 2020 baseline) does not predict 2022--2024 WPV1 cases, indicating the operative channel is recent cross-border flow rather than settled stock.
-
-\paragraph{Secondary findings.} Two-regime household contact intensity is supported: large average household size in low-density districts and stagnant population growth in high-density districts both predict WPV1 case counts (interaction $p{=}0.0006$ and $p{=}0.05$). The BCG-minus-Pol3 dropout signal, originally surfaced as an AutoDiscovery law of program-quality friction, did not replicate at either district or national scale.
-
-%---------------------------------------------------------------
-\section{Background and Motivation}
-
-\subsection{The Pakistan WPV1 resurgence}
-
-Pakistan and Afghanistan are the last two countries with endemic wild poliovirus type 1 circulation. Pakistan's annual WPV1 trajectory shows a long-term decline from $>$100 cases per year in the 1990s and 2000s, a 2014 outbreak peak, a sustained low between 2017 and 2021, then a renewed rise to 74 reported cases in 2024 (Figure~\ref{fig:national}). The 2022--2024 rebound coincided with national Pol3 coverage in the high 80s to mid-90s on the WUENIC estimate, presenting an apparent decoupling between routine immunization performance and transmission outcomes.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_national_pol3_wpv.png}
-\caption{\textbf{Pakistan national Pol3 coverage and annual WPV1 cases, 2000--2023.} National Pol3 coverage from WUENIC estimates (left axis, blue line). Annual WPV1 cases from the Our World in Data series (right axis, red bars). The dashed vertical line at 2018 marks the structural-break point detected by Bai-Perron and threshold regression analyses described in Section~\ref{sec:final}. The dotted horizontal line marks the 80\% Pol3 threshold. Shaded region marks the 2022--2024 resurgence window.}
-\label{fig:national}
-\end{figure}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_district_cases_by_year.png}
-\caption{\textbf{Pakistan annual WPV1 case counts by year, district-aggregated.} 2022--2024 resurgence (orange shading) is visible against the 2017--2021 low. Source: district-year case file aggregated from poliofreepakistan tables.}
-\label{fig:district}
-\end{figure}
-
-\subsection{The older-cohort hypothesis}
-
-Two lines of prior evidence motivate examining the older-cohort role. First, biological work has shown that mucosal poliovirus immunity attenuates in elderly populations (Abbink 2005; Buisman 2008; Boot 2007), that asymptomatic WPV shedding occurs among previously OPV-vaccinated individuals in endemic settings (Grassly 2010), that wild poliovirus can be reintroduced silently and detected primarily via environmental surveillance (Anis et al.\ 2013; Manor et al.\ 1999), and that adult-strata contributions to transmission have been documented in the Tajikistan 2010 and Republic of Congo 2010 outbreaks (Blake et al.\ 2014). Second, Pakistan-specific seroprevalence in high-risk districts shows meaningful gaps in non-pediatric anti-poliovirus immunity (Hussain 2017). Third, the documented Pakistan-Afghanistan corridor (O'Reilly et al.\ 2012a,b) provides a transmission pathway potentially involving working-age mobile populations rather than under-fives.
-
-\subsection{Prior AutoDiscovery findings}
-
-Four prior AutoDiscovery runs over the Pakistan polio data (described in Section~\ref{sec:methods}) curated into six candidate cross-cutting laws:
-\begin{description}[itemsep=0.2em, labelindent=1em, leftmargin=1.5em]
-\item[\textbf{L1}] Pol3-WPV1 elasticity decouples around 2018--2019 and Pol3's predictive power is reproducible by other antigens (cross-antigen substitution).
-\item[\textbf{L2}] Districts with higher older-cohort (15--64, 60+, 65+) population shares show higher WPV1 case incidence and higher environmental surveillance positivity, controlling for under-five share and Pol3.
-\item[\textbf{L3}] WPV1 persistence concentrates in two structurally distinct district types: large-household-size rural low-density districts, and stagnant high-density urban cores.
-\item[\textbf{L4}] Sex-ratio anomalies in the 15--49 age band predict WPV1 incidence consistent with mobility-driven ``sending'' and ``sink'' districts; the signal is strongest in the 2022--2024 resurgence window.
-\item[\textbf{L5}] BCG-minus-Pol3 dropout at the district level is a stronger predictor of WPV1 incidence than absolute Pol3 coverage.
-\item[\textbf{L6}] The 2022--2024 resurgence is geographically and demographically distinct from the 2019--2020 outbreak (transversal --- absorbed into L1 and L4).
-\end{description}
-
-These laws are candidate hypotheses, not confirmed mechanisms. The investigation reported here was designed to test them against independent statistical analyses and, where they survived, to develop them further.
-
-%---------------------------------------------------------------
-\section{Methods}
-\label{sec:methods}
-
-\subsection{Data sources}
-
-The investigation used the following publicly available and donor-released datasets (full catalogue in Appendix~\ref{app:datasets}):
-
-\begin{itemize}[noitemsep]
-\item District-year WPV1 and cVDPV2 case counts for Pakistan, 2019--2024 (131 districts; aggregated from poliofreepakistan situation tables); see Dataset D1.
-\item Pakistan Bureau of Statistics 2023 Census district demographics and age-band tables (135 districts); see Datasets D2 and D3.
-\item Pakistan Standards of Living Measurement Survey (PSLM) 2019--20 district-level antigen panel covering BCG, Penta1--3, Pneumococcal1--3, Polio1--3, and Measles; see Dataset D4.
-\item World Health Organization Eastern Mediterranean Regional Office (WHO EMRO) weekly polio bulletins, province-week environmental surveillance positivity 2019--2024 (912 issues); see Dataset D5.
-\item WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan, 2011--2022; see Dataset D6.
-\item Our World in Data (OWID) global wild poliovirus annual case series, 1980--2023, with disaggregation to Pakistan and Afghanistan; see Datasets D7 and D8.
-\item Pakistan National Emergency Action Plan (NEAP) 2017--2018 district tier classification (Tier 1 core reservoir through Tier 4 low risk), extracted from the published plan; see Dataset D9.
-\item UNHCR registered Afghan refugee population by Pakistan district, December 2020 baseline (116 districts, 1{,}435{,}445 individuals), via Humanitarian Data Exchange; see Dataset D10.
-\item Local literature index of approximately 1{,}200 polio-related publications retained for citation lookup but not used as direct input to statistical models.
-\end{itemize}
-
-\subsection{Computational agents and their roles}
-
-\paragraph{AutoDiscovery (AD).} A multi-criteria automated discovery system that iterates over a dataset, formulating and testing thousands of hypotheses ranked by a normalized surprisal score. Four prior AD runs supplied the candidate laws (Section~\ref{sec:methods_ad}). After this introduction we abbreviate as AD.
-
-\paragraph{DataVoyager (DV).} An agent-driven statistical analysis system that executes user-specified analytical procedures in a sandboxed Jupyter kernel, with the ability to fit regressions, perform diagnostics, and produce structured outputs. We used DV (a) to replicate each AD law against the original Pakistan data, (b) to run cross-source robustness experiments, and (c) to execute the pre-registered AED-designed follow-on test. After this introduction we abbreviate as DV.
-
-\paragraph{Theorizer.} An agent-driven theory-generation system that synthesizes literature-grounded scientific theories from a research query, a 100-paper PaperFinder-discovered corpus, and (in this case) the AD-curated laws and DV-reproduced findings. Includes a calibrated novelty assessment producing law-level scores in three tiers: ``Already Stated,'' ``Derivable Unstated,'' and ``Genuinely New.''
-
-\paragraph{AutoExperimentDesigner (AED).} An agent-driven pre-registration system that, given a target theory and an available data inventory, produces a fully specified statistical protocol with pre-registered decision rules, sensitivity analyses, and required deliverables. After this introduction we abbreviate as AED.
-
-\subsection{AutoDiscovery curation and replication design}
-\label{sec:methods_ad}
-
-The four AD runs together completed 121 successful experiments. We curated cross-cutting findings at $|$surprisal$|$ $\geq 0.27$ or the system's intrinsic surprise flag set to true, grouping into the six laws L1--L6 listed in Section~3.3. A frequent feature of the AD output is that decisive refutations carried a system-default surprisal magnitude of $-0.6558$ rather than a calibrated effect size; we therefore treated these as direction-of-evidence flags rather than estimated effect sizes during replication.
-
-For each law we wrote a precise hypothesis statement, identified the datasets and variables required for replication, specified a regression model with controls, and pre-registered a quantitative decision rule (e.g., for L1: ``the absolute coefficient on Pol3 will be within $\pm$20\% of the absolute coefficients on Penta3 and Measles, and a likelihood-ratio test of dropping Pol3 will not reject at $p<0.05$''). Each replication was submitted to DV as a single analysis task. Where DV's initial result returned a structural failure (data quality, sample size, identifiability), we permitted a redesigned outcome variable that preserved the same underlying mechanism (for example, replacing a binary persistence outcome with a Poisson on case counts when the binary outcome had only one positive case in the data).
-
-\subsection{Cross-source robustness experiments}
-
-Seven additional experiments tested the AD laws using either independent data sources or novel re-uses of in-scope data: cross-source replication of the Pol3 decoupling using the WHO Global Health Observatory series (Experiment E10 in the catalogue); country-pair Pakistan-Afghanistan WPV1 coupling using OWID global data; province-year ES-to-AFP discordance ratio testing the silent-transmission hypothesis; WPV1-vs-cVDPV2 subtype contrast in districts with both viruses; an HDX/WHO cross-source dropout test; NEAP-tier $\times$ border-adjacency $\times$ post-2021 cross-classification; and UNHCR-registered Afghan refugee stock as a static-mobility predictor. Full experiment catalogue in Appendix~\ref{app:experiments}.
-
-\subsection{Theorizer runs}
-
-Two Theorizer passes were run on identical research queries with identical inputs. The first used \texttt{generation\_objective={accuracy-focused}}, the second \texttt{generation\_objective={novelty-focused}}. Both used a 100-paper PaperFinder corpus and the calibrated novelty assessment. The accuracy-focused pass returned 6 theories with 11 law-level novelty scores; the novelty-focused pass returned 7 additional theories with 14 law-level novelty scores. The two passes drew partially overlapping corpora --- the novelty-focused pass surfaced Pakistan-specific seroprevalence and Afghan household-immunity surveys that the accuracy-focused pass did not weight.
-
-\subsection{AutoExperimentDesigner follow-on protocols}
-
-After review of the 13 candidate theories, two were selected for a fully pre-registered follow-on confirmation. Selection criteria: novelty score, computational feasibility on available data, and alignment with the mission focal question. The two selected theories were (a) the combined ``80\% Pol3 regime shift + mobility-FOI dominance + ES Sabin/WPV signature superiority'' formulation (the most-novel pass-2 result with quantitative thresholds), and (b) the ``Cohort Leakage Law'' formulation directly addressing the mission focal question via $\geq$5y susceptibility accumulation from age-target SIA mismatch.
-
-For each, AED produced a pre-registered protocol specifying: data ingestion with provenance hashing; manual district-province crosswalk; outcome variable construction; primary statistical models with all covariates; sensitivity analyses; decision rules quantitatively expressed; and required output artifacts. DV then executed each protocol. For the first protocol, we additionally pre-joined the nine source files into three master analytical panels (district-year, province-year, national-year) in a documented local script to bypass a recurring transcription bug in DV's file-loading layer; the pre-registered statistical procedures were unchanged.
-
-\subsection{Statistical procedures}
-
-\begin{itemize}[noitemsep]
-\item For count outcomes (WPV1 cases per district-year, ES n\_positives per province-year), Poisson regression with a population offset and HC robust standard errors, with negative-binomial and quasi-Poisson as pre-specified alternatives if overdispersion was detected.
-\item For binary outcomes (district persistence indicator), logistic regression with Firth's penalty available as a fallback if perfect separation was observed.
-\item For temporal break detection, Bai-Perron multi-breakpoint analysis (BIC selection) and threshold regression with a 70--90\% grid search and bootstrap confidence intervals.
-\item For predictive comparison (signature versus baseline targeting), receiver-operating-characteristic analysis with bootstrap AUC confidence intervals and explicit complete-case versus missingness-aware sensitivity branches.
-\item All tests were two-sided unless otherwise noted; significance thresholds were pre-registered as $p<0.10$ for the follow-on confirmatory analyses to maintain power on the small national-year panel.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Results}
-
-The 15 computational experiments are summarized graphically in Figure~\ref{fig:matrix} and described in detail below organized by AutoDiscovery law. Full experiment metadata, statistical inputs, and decision rules are in Appendix~\ref{app:experiments}.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_experiment_outcomes.png}
-\caption{\textbf{Computational experiment outcomes across AutoDiscovery laws.} Each row is a single experiment (E1--E15, Appendix~\ref{app:experiments}); each column is an AutoDiscovery law (L1--L5). Cells show the experiment's verdict on that law: S = Supported, R = Refuted, I = Inconclusive. Cells without entries indicate the experiment did not test that law.}
-\label{fig:matrix}
-\end{figure}
-
-\subsection{Law L1 --- Pol3-WPV1 temporal decoupling and cross-antigen substitution}
-
-The district-level cross-antigen substitution sub-claim --- that Pol3 carries no information beyond what BCG, Penta3, or Measles provides --- was refuted (Experiment E1, Appendix~\ref{app:experiments}). Fitting district-year 2022--2024 WPV1 case counts against a panel of Pol3, BCG, Penta3, and Measles coverage from PSLM 2019--20 with $\log$(population) offset, the likelihood-ratio test for dropping Pol3 from the four-antigen model rejected at $p{=}0.0021$.
-
-The temporal-decoupling claim was supported (Experiment E2). Fitting national 1990--2023 annual WPV1 cases against national Pol3 with a period interaction (late period = year $\geq$ 2018) in a Poisson regression, the period $\times \log$(Pol3) interaction coefficient was 9.46 with $p{=}0.0005$. The pre-2018 elasticity was strongly protective; the post-2018 elasticity is statistically indistinguishable from zero. The cross-source robustness check using WHO Global Health Observatory data (Experiment E10) was inconclusive because the WHO GHO wild poliovirus case series only begins in 2016, providing insufficient pre-2018 data for the structural-break test.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E1 District cross-antigen substitution (LR test for dropping Pol3) & $p{=}0.0021$ & Refuted (Pol3 informative) \\
-E2 National period $\times \log$(Pol3) interaction & coef $=9.46$, $p{=}0.0005$ & Supported (decoupling exists) \\
-E10 WHO GHO cross-source replication & insufficient pre-2018 data & Inconclusive \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L1.}
-\end{table}
-
-\textit{Interpretation:} L1 is best read as ``national Pol3 elasticity collapses around 2018,'' \emph{not} as ``Pol3 has become a generic health-system access proxy with no remaining specific signal.'' At the district cross-section, Pol3 still distinguishes itself from other antigens.
-
-\subsection{Law L2 --- Older-cohort population shares as transmission drivers}
-
-L2 was tested in three independent ways and was refuted in all three.
-
-In Experiment E3 (district-level Poisson on WPV1 cases with z-standardized age-share covariates), both 15--64 share and under-5 share were positive predictors; under-5 share dominated in magnitude and significance. In Experiment E4 (province-level Poisson on aggregated WPV1 cases and on aggregated ES n\_positives), the 15--64 share coefficient was null for both outcomes (n = 5 provinces). In Experiment E8 (province-year ES-to-AFP discordance ratio --- a hypothesis that silent transmission concentrated in older cohorts should produce more ES positivity per paralytic case in adult-heavy provinces), no significant positive effect of any age band (15--64, 60+, or 65+) was detected.
-
-Experiment E9 produced the most informative refutation. Among the 40 districts that reported both WPV1 and cVDPV2 cases during 2019--2021, we fit OLS on the WPV1/(WPV1+cVDPV2) subtype ratio against the 15--64 age share. The coefficient on 15--64 share was $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). cVDPV2 --- not WPV1 --- concentrates in adult-heavy districts (Figure~\ref{fig:subtype}). This is the opposite direction of L2's prediction.
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.92\textwidth]{report_example_figures/fig_subtype_vs_age.png}
-\caption{\textbf{District-level subtype ratio vs.\ 15--64 age share, 2019--2021.} Districts with at least one case of either WPV1 or cVDPV2. Marker size encodes total case volume; color encodes the same. The downward OLS slope indicates cVDPV2 (low ratio) concentrates in adult-heavy districts.}
-\label{fig:subtype}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E3 District Poisson, 15--64 vs under-5 & under-5 dominates & Refuted (no 15--64 dominance) \\
-E4 Province Poisson on WPV1 + ES, n=5 & 15--64 null both outcomes & Refuted \\
-E8 Province ES-to-AFP discordance & all adult-band coefs null & Refuted (no silent signature) \\
-E9 Subtype ratio in mixed-virus districts & coef $=-8.01$, $p<0.001$ & Refuted with inversion \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L2.}
-\end{table}
-
-\textit{Interpretation:} The ``adult reservoir'' framing of L2 is empirically inverted. WPV1 retains a pediatric profile; cVDPV2 is the subtype that aligns with adult-heavy demographic structure, consistent with cVDPV2's known emergence from post-OPV2-cessation immunity gaps in cohorts that did not receive sufficient mucosal challenge (Mangal \& Grassly 2013).
-
-\subsection{Law L3 --- Two-regime household contact intensity}
-
-L3 was supported (Experiment E5). Fitting a Poisson regression of district 2022--2024 WPV1 case totals against $\log$(average household size), $\log$(population density), their interaction, growth rate, and Pol3 with a $\log$(population) offset, the $\log$(household size) $\times \log$(density) interaction coefficient is significant at $p{=}0.0006$ with the sign indicating the household-size effect is amplified at low density. A separate model adding a low-growth $\times$ high-density interaction produces $p{=}0.05$ in the predicted direction. Both regimes pre-specified by the hypothesis are recovered.
-
-\textit{Interpretation:} Rural high-household-size districts and stagnant high-density urban cores are two structurally distinct district types where WPV1 transmission persists. This is consistent with prior Pakistan-specific household risk factor work (Hennessey et al.\ 2000) and theoretical persistence-under-low-turnover models (Burton et al.\ 2012).
-
-\subsection{Law L4 --- Cross-border mobility mechanism}
-
-L4 produced the most evidence-rich and most nuanced finding of the investigation. The original sex-ratio proxy formulation (E6) was inconclusive after multiple attempts due to small analytic samples and execution challenges. The mechanism itself, however, is strongly supported by three independent experiments:
-
-\begin{itemize}[noitemsep]
-\item Experiment E11 fit a Poisson regression of Pakistan annual WPV1 cases against Afghanistan annual WPV1 cases (controlling for Pakistan Pol3 and year trend) on the 2001--2023 OWID country-pair series. The concurrent-year Afghanistan coefficient was positive and statistically significant; the post-2021 $\times \log$(Afghanistan WPV) interaction was positive at $p<0.10$. Cross-country coupling intensified after the 2021 regime change in Afghanistan (Figure~\ref{fig:pakafg}).
-\item Experiment E13 fit a district-year Poisson with NEAP-tier dummies and a border-adjacency indicator, interacted with a post-2021 period. In the pooled 2019--2024 panel, border-adjacency was null after controlling for NEAP tier (coefficient $\approx$0, $p{=}0.99$); in the period-stratified model, the border $\times$ post-2021 interaction was marginally significant at coefficient 1.75, $p{=}0.079$.
-\item Experiment E12 fit a district Poisson regression of 2022--2024 WPV1 cases against the December 2020 UNHCR-registered Afghan refugee population, controlling for population and Pol3. The coefficient was null. This rules out the static stock of resident refugees as the channel of the L4 signal.
-\end{itemize}
-
-\begin{figure}[h]
-\centering
-\includegraphics[width=0.95\textwidth]{report_example_figures/fig_pak_afg_coupling.png}
-\caption{\textbf{Pakistan and Afghanistan annual WPV1 cases, 2001--2023.} Pakistan in blue, Afghanistan in red. The dashed vertical line marks the August 2021 regime change in Afghanistan after which cross-border coupling intensified per Experiment E11.}
-\label{fig:pakafg}
-\end{figure}
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Test & Result & Verdict \\
-\midrule
-E6 District sex-ratio proxy (4 attempts) & $n{=}13$ underpowered & Inconclusive \\
-E11 Pak-Afg country-pair coupling & post-2021 $\times$ Afg-WPV positive, $p<0.10$ & Supported \\
-E13 NEAP-tier $\times$ border $\times$ post-2021 & interaction 1.75, $p{=}0.079$ & Marginally supported \\
-E12 Refugee stock as mobility predictor & null & Refuted (static channel ruled out) \\
-\bottomrule
-\end{tabular}
-\caption{Experiments testing Law L4.}
-\end{table}
-
-\textit{Interpretation:} The L4 mobility mechanism is real and is post-2021 specific. The operative channel is recent cross-border flow (returnees, deportations, transits) rather than the stock of resident Afghan refugees already settled in Pakistan districts. The working-age 15--49 population is intrinsically implicated by any cross-border mobility mechanism --- this is the indirect contribution of older ($\geq$5y) populations to transmission.
-
-\subsection{Law L5 --- BCG-Pol3 program-quality dropout}
-
-L5 was refuted twice (Experiment E7). At the district level for 2022--2024, fitting nested Poisson regressions of WPV1 case counts on (M1) Pol3 alone, (M2) BCG-minus-Pol3 dropout alone, and (M3) both, AIC(M1) = 118.25 was lower than AIC(M2) = 119.18, and in M3 neither coefficient was statistically significant. At the national level for 2011--2022, the BCG-Pol3 dropout coefficient in a Poisson regression of annual WPV1 cases was 0.082 with $p{=}0.289$; a likelihood-ratio test for adding dropout to a Pol3-only model rejected at $p{=}0.383$.
-
-The cross-source check using HDX/WHO immunization indicators (Experiment E14 in the sequential catalogue) was inconclusive because the HDX dataset lacked Pol3 / DTP3 columns for the relevant 2016--2023 period.
-
-\textit{Interpretation:} The BCG-Pol3 dropout indicator does not provide explanatory value beyond absolute Pol3 coverage. The broader programmatic-failure narrative remains plausible (refusals, missed children, security access) but its empirical signature is not BCG-Pol3 dropout in these data.
-
-%---------------------------------------------------------------
-\section{Pre-Registered Confirmatory Test of the Combined Theory}
-\label{sec:final}
-
-\subsection{Background and rationale}
-
-The two passes of the Theorizer generated 13 candidate theories spanning surveillance dynamics, corridor coupling, and cohort leakage. Among the 13, the most novel theory (8 of 14 law-level claims rated ``Genuinely New'' by the calibrated novelty assessment) was the combined catchment-heterogeneity-decoupling theory: above approximately 80\% national Pol3, mobility-weighted force of infection dominates over Pol3 coverage as the predictor of district WPV1 transmission and province-week environmental surveillance positivity, and an environmental surveillance ``Sabin-low / WPV-high'' signature predicts next-quarter WPV1 detection more accurately than targeting lowest-Pol3 districts.
-
-This theory was selected for a pre-registered confirmatory test because (a) its key claims are quantitative thresholds that are directly testable on available data, (b) it integrates multiple AD laws (L1 + L4) into a single coherent mechanism, (c) it is consistent with the supporting evidence already accumulated in Experiments E2, E11, E12, and E13 without merely restating them, and (d) it generates an immediately operational prediction about field-level surveillance targeting.
-
-\subsection{AutoExperimentDesigner pre-registered protocol}
-
-The AutoExperimentDesigner produced a fully specified pre-registered protocol structured as three sequential predictions, each with its own decision rule, and a combined-theory verdict rule.
-
-\paragraph{Prediction 1 (regime shift).} Build the national annual panel of WUENIC Pol3 coverage (2011--2022) joined to OWID annual WPV1 cases (1990--2023). Fit the baseline linear model $\log(\text{WPV cases} + 1) \sim \log(\text{Pol3})$ on the overlap years. Perform Bai-Perron breakpoint analysis scanning for breaks 2016--2020 inclusive. Perform threshold regression on Pol3 with a 0.5\%-spacing grid over 70--90\%. Perform corroborative changepoint analysis and a period-interaction model. The prediction is supported if a structural break is detected near 2018--2019 or near the first year Pol3 first crosses 80\%.
-
-\paragraph{Prediction 2 (mobility-FOI dominance post-threshold).} Build the district-year panel of WPV1 cases 2019--2024 with covariates: standardized mobility-FOI proxy (primary = border-adjacency $\times$ post-2021; secondary = NEAP-tier $\times$ growth-rate; tertiary, robustness only = Afghan refugee stock); standardized district Pol3 from PSLM 2019--20; $\log$(population) as offset; district random intercepts; year fixed effects; structural controls (household size, urban proportion, growth). Fit a Poisson regression with the post-threshold national period defined as year $\geq$ first year Pol3 $\geq$ 80\%. The prediction is supported if (a) in the post-threshold period the standardized mobility-FOI coefficient is positive at $p<0.10$, (b) $|\beta_\text{mobility}|/|\beta_\text{Pol3}| \geq 1.5$, (c) the post-threshold Pol3 effect is either statistically zero or at least 50\% smaller than its pre-threshold counterpart, and (d) the pattern holds at alternative thresholds 75\% and 80\% with at least one alternative mobility proxy. The province-quarter ES model uses the same structure with ES n\_positives as outcome and observed-ES-weeks as offset.
-
-\paragraph{Prediction 3 (operational signature).} Extract Sabin-related isolate counts from 912 WHO EMRO weekly bulletins using fixed regular expressions on the highlights section with confidence labels (high = numeric count adjacent to ``Sabin''; medium = mention without count; low = OCR-garbled). Audit-sample 30 random bulletins manually. Treat missing Sabin counts conservatively (never as zero in the primary analysis). Define a province-quarter Sabin-low / WPV-high signature as quarters with at least one WPV1-positive ES week AND Sabin counts in the lowest tertile within the calendar year. Compare next-quarter WPV1 detection against (a) the signature and (b) lowest-Pol3 targeting at fixed 25\% targeting. The prediction is supported if PPV ratio $\geq 2.0$ AND AUC difference's 95\% bootstrap CI excludes zero AND sensitivity ratio at specificity 0.80 is $\geq 1.25$.
-
-\paragraph{Combined-theory verdict rule.} The combined theory is supported if all three predictions are supported AND no primary analysis shows a statistically significant effect in the opposite direction of the theory's claim.
-
-\subsection{DataVoyager execution}
-
-The DV agent executed the protocol in 34 cells with zero kernel errors. The execution was deterministic with a fixed random seed. The agent fit all pre-registered models, computed all sensitivity analyses, and produced all required output artifacts. The Sabin extraction achieved 74\% completeness across the 912-bulletin corpus, below the protocol's 80\% threshold for ``high-confidence'' status, which triggered the protocol's pre-registered missingness-aware sensitivity branch (a tertiary missing-Sabin category in the signature regression). All complete-case and missingness-aware results were reported.
-
-A note on data plumbing: the first three DV submissions failed at the file-loading layer due to a recurring agent-side bug where a 24-character workspace identifier was correctly transcribed in early cells and then incorrectly transcribed in later cells, producing a \texttt{FileNotFoundError}. To unblock the lane, we performed the protocol's data-ingestion step (manual crosswalk + multi-file join) in a documented local pre-processing script that produced three master analytical panels (\texttt{district\_year\_panel.csv} with 1{,}350 rows $\times$ 68 columns; \texttt{province\_year\_panel.csv} with 30 rows $\times$ 36 columns; \texttt{national\_year\_panel.csv} with 23 rows $\times$ 8 columns) and a concatenated bulletins file (18.7 MB across 912 bulletins). A provenance JSON file documents every join rule and 27 unmatched-name resolutions. With the nine-file multi-join surface collapsed to a five-file load via \texttt{glob}-based enumeration, the agent ran cleanly for 34 cells.
-
-\subsection{Result 1: National regime shift near 80\% Pol3 (Prediction 1 supported)}
-
-The first year that national Pol3 coverage crossed 80\% on the WUENIC estimate was 2018. The Bai-Perron analysis selected 2018 as the optimal break year. The threshold regression returned $\gamma{=}80.5\%$ with a 95\% bootstrap confidence interval of $[79.0, 82.0]$, entirely within the pre-specified support region. The corroborative changepoint analysis found a change at 2018 in both $\log$(WPV cases) and in the residuals of the Pol3-on-WPV regression. Leave-one-year-out diagnostics and bootstrap confidence intervals showed the break estimate was stable.
-
-\subsection{Result 2: Mobility-FOI dominance over Pol3, post-threshold (Prediction 2 supported)}
-
-In the post-threshold district-year Poisson model on 2019--2022 WPV1 cases, the border-adjacency $\times$ post-2021 interaction produced an incidence rate ratio of 2.11 (95\% CI 1.28--3.46, $p<0.01$). The standardized mobility coefficient was $+$0.42 with its 95\% confidence interval entirely above zero. The post-threshold standardized Pol3 coefficient was $-$0.18 with 95\% CI $[-0.44, +0.03]$, including zero. The inequality $|\beta_\text{mobility}|/|\beta_\text{Pol3}|{=}2.33$ exceeded the pre-registered 1.5 threshold. The pre-threshold standardized Pol3 coefficient was $-$0.39, so the post-threshold Pol3 effect was attenuated by more than 50\% relative to the pre-threshold counterpart.
-
-The pattern held at alternative thresholds 75\% and 80\% with the primary mobility proxy and with the NEAP-tier $\times$ growth secondary proxy. It weakened at $c{=}85$\%. The pattern \emph{failed} when the mobility-FOI proxy was restricted to the resident Afghan refugee stock branch, consistent with Experiment E12's earlier null. The province-quarter ES model returned a parallel mobility IRR of 1.98 (95\% CI 1.10--3.45) with similar attenuation of the post-threshold Pol3 coefficient.
-
-\subsection{Result 3: Environmental surveillance signature (Prediction 3 supported)}
-
-At fixed 25\% top-targeting on province-quarters within the 2019--2023 panel, the Sabin-low / WPV-high signature achieved positive predictive value (PPV) of 0.44 for next-quarter WPV1 detection. The lowest-Pol3 targeting baseline achieved a PPV of 0.18. The signature-to-baseline PPV ratio was 2.44, exceeding the pre-registered minimum of 2.0. The signature AUC was 0.77 versus the baseline AUC of 0.54, an AUC difference of 0.23 with a 95\% bootstrap confidence interval of $[0.09, 0.35]$ that excluded zero. The sensitivity ratio at fixed specificity 0.80 was 1.42, exceeding the pre-registered minimum of 1.25. The missingness-aware sensitivity branch maintained the qualitative ranking with somewhat wider confidence intervals.
-
-\subsection{Combined verdict}
-
-All three pre-registered predictions met their decision rules. No primary analysis returned a statistically significant effect in the opposite direction. The combined theory is supported.
-
-\begin{table}[h]
-\centering
-\small
-\begin{tabular}{lll}
-\toprule
-Component & Key statistic & Status \\
-\midrule
-P1 National regime shift & threshold $\gamma{=}80.5\%$, CI 79.0--82.0 & Supported \\
-P2 Mobility-FOI dominance & IRR 2.11 (CI 1.28--3.46), inequality ratio 2.33 & Supported \\
-P3 ES signature & PPV ratio 2.44, AUC diff 0.23 (CI 0.09--0.35) & Supported \\
-Combined theory & all three supported, no contrary signal & \textbf{Supported} \\
-\bottomrule
-\end{tabular}
-\caption{Pre-registered confirmatory test outcomes (Experiment E15).}
-\end{table}
-
-%---------------------------------------------------------------
-\section{Trustworthiness Analysis}
-
-\subsection{What we can trust}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Pre-specified decision rules.} The follow-on test's three predictions and combined-theory verdict rule were fully specified in the AutoExperimentDesigner protocol before any DataVoyager execution. The DV transcript shows the protocol was followed; the verdict was generated by applying the pre-specified rule, not by post-hoc interpretation.
-\item \textbf{Independence of confirming evidence.} The combined theory's three predictions are supported by analyses on largely independent data subsets: P1 uses the national time series WUENIC + OWID (no district data); P2 uses district-year cases + PSLM 2019--20 antigen coverage + a hand-curated border-adjacency table; P3 uses province-week environmental surveillance positives + regex-extracted Sabin counts from bulletin text. The probability that all three are supported by chance is materially lower than any single test's nominal $\alpha$.
-\item \textbf{Cross-source convergence.} The cross-border mobility mechanism is supported by three independent experiments (E11 country-pair coupling, E13 NEAP-tier $\times$ border $\times$ post-2021, and the follow-on Prediction 2). The resident-refugee-stock channel is ruled out by two independent experiments (E12 in the catalogue, and Prediction 2's robustness branch). The convergent direction is unlikely under a null model.
-\item \textbf{Mechanism-direction inversion is informative.} The subtype contrast result (E9) does not merely fail to support L2; it produces a statistically strong effect in the opposite direction (coefficient $-8.01$, $p<0.001$). This is consistent with cVDPV2 emergence biology and is a usable finding for downstream theoretical work.
-\item \textbf{Pre-existing replication of the temporal break.} The Bai-Perron 2018 break (Prediction 1) is consistent with the earlier independent Experiment E2 result ($p{=}0.0005$) on the same national time series with a slightly different functional form. The break is robust to specification.
-\end{itemize}
-
-\subsection{Key limitations}
-
-\begin{itemize}[noitemsep]
-\item \textbf{Mobility proxy is indirect.} The follow-on Prediction 2 uses Afghanistan-border-adjacency interacted with a post-2021 period as a proxy for mobility-weighted force of infection. The proxy is theoretically appropriate (border crossings concentrate in those districts and post-2021 reflects a documented regime change), but it is not a direct flow measurement. A direct measurement of cross-border movement (UNHCR voluntary repatriation timing, IOM-DTM displacement-tracking, or Afghan deportation timing) would provide stronger inference. Such data were not used in the present analysis.
-\item \textbf{National panel is small.} The Bai-Perron and threshold regression analyses use 12 years of WUENIC overlap with the 22-year OWID series, giving an effective sample size for break detection that is power-limited. The 80\% threshold value should be treated as approximate within the $[79.0, 82.0]$ confidence band.
-\item \textbf{Sabin extraction completeness.} The bulletin regex extraction achieved 74\% completeness against the protocol's 80\% target. The missingness-aware sensitivity branch maintained the qualitative ranking, but the protocol explicitly notes that high-confidence Sabin counts cannot be distinguished from medium-confidence extractions in the present extraction pipeline. A manual 30-bulletin audit (per protocol) was completed and is consistent with the regex extraction.
-\item \textbf{District Pol3 is a single year.} The PSLM antigen panel is a 2019--20 snapshot. The district-level regressions therefore treat Pol3 as time-invariant within district. Time-varying district-level coverage data are not currently available for Pakistan.
-\item \textbf{Cohort Leakage Law is empirically degenerate at current resolution.} The AED-designed Cohort Leakage Law test (Experiment E16, Appendix~\ref{app:experiments}) returned an inconclusive verdict because the protocol's strict z-standardized covariate merge against province-week ES data reduced the analytic sample to one province-year. This is informative: the older-cohort silent-transmission mechanism cannot be empirically distinguished from a null hypothesis at province-week ES + district-year AFP resolution. It is not a refutation of the underlying biology.
-\item \textbf{Theorizer corpus selection.} The PaperFinder corpus heavily indexed on programmatic reports and Pakistan-Afghanistan phylogenetic studies. The foundational immunological mucosal-immunity papers were less weighted. The novelty-focused pass partially corrected this. Theorizer's novelty classifier is calibrated against its retrieved corpus, not against the global literature.
-\end{itemize}
-
-\subsection{Deviations from protocol}
-
-\begin{itemize}[noitemsep]
-\item For the follow-on confirmatory test, the AutoExperimentDesigner-designed data-ingestion step (manual district crosswalk + multi-file join) was performed in a documented local pre-processing script rather than by the DV agent. The pre-processing was triggered by three consecutive DV submissions failing at the data-loading layer due to a recurring agent-side transcription bug on long file-system paths. The pre-registered statistical procedures themselves were unchanged. Every join rule, aggregation choice, and unmatched-name resolution was logged to a provenance file that is part of the run's audit trail. This deviation shifts the audit chain for the data-plumbing phase from the DV transcript to the pre-processing script, while preserving the audit chain for the statistical analysis phase in the DV transcript.
-\item The Cohort Leakage Law test (E16) was reported as inconclusive per the pre-registered overall rule rather than reporting Prediction 1's individual coefficient values, which had been computed by DV but not surfaced to the agent's terminal output before the overall rule resolved. The coefficient tables exist within the DV session workspace.
-\end{itemize}
-
-%---------------------------------------------------------------
-\section{Conclusions}
-
-\paragraph{Mission focal question.} The role of populations aged 5 years or older in Pakistan's 2022--2024 WPV1 resurgence has a reconciled two-part answer.
-
-\emph{(a) As primary transmission reservoirs: no.} The AutoDiscovery ``adult reservoir'' framing is refuted in three independent ways: under-five population share dominates 15--64 share at the district level, the 15--64 share is null for both case and ES outcomes at the province level, and cVDPV2 (not WPV1) is the subtype that concentrates in adult-heavy districts.
-
-\emph{(b) As operative mobility vectors: yes.} The post-2021 Pakistan-Afghanistan annual WPV1 case coupling, the post-2021 activation of border-adjacency as a transmission predictor, and the null effect of resident refugee stock collectively localize the L4 mobility mechanism to recent cross-border flow. The 15--49 working-age cohort is intrinsically the mobile sub-population.
-
-\paragraph{Most important new operational finding.} Pakistan crossed an approximately 80\% national Pol3 threshold around 2018 and entered a regime where mobility-driven force of infection dominates over coverage as the operative predictor of WPV1 transmission. The standardized mobility effect is at least 2.33 times the standardized Pol3 effect post-threshold, and the post-threshold Pol3 effect is statistically indistinguishable from zero. An environmental surveillance Sabin-low / WPV-high signature outperforms targeting the lowest-Pol3 districts on next-quarter WPV1 detection by a factor of 2.4 on positive predictive value and by an AUC difference of 0.23. This combined finding integrates the calibrated quantitative thresholds the Theorizer surfaced as novel with an independent pre-registered statistical confirmation.
-
-\paragraph{Confidence level.} High for the regime-shift and mobility-dominance components; moderate for the operational signature given the 74\% Sabin extraction completeness. The cross-source pattern is consistent across the per-AutoDiscovery-law replications, the cross-source robustness experiments, and the pre-registered confirmatory test, and no primary analysis returns a statistically significant contrary signal.
-
-%---------------------------------------------------------------
-\section{Future Directions}
-
-\begin{enumerate}[noitemsep]
-\item \textbf{Direct measurement of cross-border flow.} Replace the border-adjacency $\times$ post-2021 proxy with monthly UNHCR voluntary repatriation by border crossing point and IOM-DTM displacement-tracking data. Test whether direct-flow measurements yield IRRs consistent with the 2.11 estimate from the proxy regression and whether the 80\% Pol3 regime-shift inequality holds with calibrated FOI inputs.
-\item \textbf{Genomic confirmation of corridor coupling.} The Theorizer pass-2 theories that depend on lineage-narrowing dynamics (cluster-share thresholds, orphan-divergence rules, and the Afghan-LQAS-to-YB3A-composition prediction) require global polio laboratory network (GPLN) cluster-share data not used here. Cross-border genomic linkage data extending the Asghar et al.\ 2017 lineage analysis to the 2022--2024 window would test these directly.
-\item \textbf{District-week environmental surveillance and AFP timing.} The Cohort Leakage Law remained inconclusive because province-week ES and district-year AFP cannot be merged into an analytic sample under strict pre-registered missingness handling. District-week AFP timing or province-week AFP onset would enable a direct test of the silent-transmission discordance signature.
-\item \textbf{Age-stratified shedding studies in endemic Pakistan districts.} Direct age-stratified WPV1 carriage and shedding measurements --- particularly in core reservoir districts during 2024--2025 --- would resolve the abductive premise of the Cohort Leakage Law and refine the role of the 5+ year cohort beyond demographic inference.
-\item \textbf{Operational pilot of the ES Sabin-low / WPV-high signature.} The signature outperforms lowest-Pol3 targeting in the historical 2019--2023 panel. A prospective pilot in 2025--2026 would test whether districts targeted by the signature yield lower next-quarter WPV1 case counts than districts targeted by lowest-Pol3 alone.
-\end{enumerate}
-
-%---------------------------------------------------------------
-\appendix
-
-\section{Computational Experiment Catalogue}
-\label{app:experiments}
-
-This appendix lists every computational experiment performed in the investigation. Each entry summarizes the hypothesis tested, the data and statistical procedure used, the result, and the verdict.
-
-\paragraph{Experiment E1 --- District-level cross-antigen substitution.} Hypothesis: at the district 2022--2024 cross-section, Pol3 coverage carries no information about WPV1 case counts beyond what BCG, Penta3, and Measles coverage provide. Method: Poisson regression of district WPV1 case mean against the four standardized antigens with $\log$(population) offset on PSLM 2019--20 + district\_year\_wpv\_cases; likelihood-ratio test for dropping Pol3 from the full model. Result: LR test rejects at $p{=}0.0021$. Verdict: refuted (Pol3 is informative).
-
-\paragraph{Experiment E2 --- National Pol3-WPV1 temporal-decoupling regression.} Hypothesis: national Pol3-WPV1 elasticity decoupled around 2018 in the 1990--2023 series. Method: Poisson regression of annual WPV1 cases against $\log$(Pol3), late-period indicator, $\log$(Pol3) $\times$ late-period interaction, and year index, using WUENIC + OWID. Result: period $\times \log$(Pol3) interaction coefficient = 9.46, $p{=}0.0005$. Verdict: supported.
-
-\paragraph{Experiment E3 --- District-level older-cohort vs.\ under-5 share.} Hypothesis: 15--64 population share dominates under-5 share in predicting district WPV1 case counts. Method: Poisson regression with standardized age-share covariates, PSLM Pol3, $\log$(population) offset on the deduplicated PBS 2023 age-band file + district\_year\_wpv\_cases. Result: both shares positive; under-5 effect substantially larger in magnitude and significance. Verdict: refuted (dominance claim).
-
-\paragraph{Experiment E4 --- Province-level older-cohort regression on WPV1 and ES.} Hypothesis: at the province-year level (n=5), 15--64 share predicts both WPV1 cases and environmental surveillance positivity. Method: Poisson regressions on province-year totals with population-weighted age-share aggregates. Result: 15--64 share null in both outcomes. Verdict: refuted (province scale).
-
-\paragraph{Experiment E5 --- Two-regime household contact persistence.} Hypothesis: large household size in low-density districts AND stagnant growth in high-density districts both predict 2022--2024 district WPV1 case counts. Method: Poisson regression with $\log$(household size) $\times \log$(density) interaction and density $\times$ low-growth interaction, $\log$(population) offset. Result: both interactions significant in the predicted direction ($p{=}0.0006$ and $p{=}0.05$). Verdict: supported.
-
-\paragraph{Experiment E6 --- District sex-ratio mobility proxy.} Hypothesis: the deviation of the 15--49 sex ratio from unity in border-adjacent districts predicts the 2022--2024 share of WPV1 cases. Method: OLS regression of district 2022--2024 case share on $|1 - \text{sex\_ratio}_{15-49}|$ interacted with a hand-curated border-adjacency indicator. Result: after four attempts using different operationalizations, the analytic sample reached only 13 districts and the interaction was statistically insignificant. Verdict: inconclusive (underpowered).
-
-\paragraph{Experiment E7 --- BCG-Pol3 program-quality dropout.} Hypothesis: district BCG-minus-Pol3 dropout outperforms absolute Pol3 in predicting district WPV1 case counts. Method: nested Poisson regressions (Pol3 only; dropout only; both) with AIC comparison, plus a national 2011--2022 time-series regression. Result: AIC(M1=Pol3 only)=118.25, AIC(M2=dropout only)=119.18; neither significant in M3. National dropout coefficient 0.082, $p{=}0.289$; LR for adding dropout, $p{=}0.383$. Verdict: refuted at both scales.
-
-\paragraph{Experiment E8 --- Province ES-to-AFP discordance ratio.} Hypothesis: the ratio of province-year ES positives to paralytic WPV1 cases is higher in adult-heavy provinces (silent-transmission signature). Method: log-linear regression of $\log(\text{ES}+0.5)/(\text{cases}+0.5)$ on standardized age shares and Pol3. Result: no significant positive effect of 15--64, 60+, or 65+ share on the ratio. Verdict: refuted.
-
-\paragraph{Experiment E9 --- Subtype demographic contrast.} Hypothesis: in districts that reported both WPV1 and cVDPV2 cases during 2019--2021, the WPV1/(WPV1+cVDPV2) ratio is positively associated with 15--64 share. Method: OLS regression on the 40 such districts. Result: coefficient $-8.01$ (95\% CI $[-12.5, -3.5]$, $p<0.001$). Verdict: refuted with inversion --- cVDPV2 dominates adult-heavy districts.
-
-\paragraph{Experiment E10 --- Cross-source national Pol3-WPV1 break test.} Hypothesis: the temporal break in Experiment E2 replicates with WHO Global Health Observatory data. Method: same regression as E2 with WHO GHO Pol3 and WHO GHO wild poliovirus cases. Result: WHO GHO wild poliovirus case series begins 2016, providing two years of pre-break data --- insufficient for the structural break test. Verdict: inconclusive.
-
-\paragraph{Experiment E11 --- Pakistan-Afghanistan country-pair WPV1 coupling.} Hypothesis: Pakistan annual WPV1 case counts are positively coupled with Afghanistan annual WPV1 case counts, and the coupling strengthens post-2021. Method: Poisson regression of Pakistan WPV against Afghanistan WPV (concurrent and 1-year lag), Pakistan Pol3, year trend, and a post-2021 interaction, on the 2001--2023 OWID global series. Result: concurrent-year Afghanistan effect positive and significant; post-2021 $\times \log$(Afghanistan WPV) interaction positive at $p<0.10$. Verdict: supported.
-
-\paragraph{Experiment E12 --- Resident Afghan refugee stock as mobility predictor.} Hypothesis: district WPV1 cases 2022--2024 are positively predicted by the December 2020 UNHCR-registered Afghan refugee population. Method: Poisson regression of district 2022--2024 case counts against $\log$(refugees + 1), Pol3, and $\log$(population) offset, on 116 districts. Result: refugee-stock coefficient null; no period interaction effect. Verdict: refuted (static-stock channel ruled out).
-
-\paragraph{Experiment E13 --- NEAP-tier $\times$ border-adjacency cross-classification.} Hypothesis: border-adjacency adds explanatory power for district WPV1 cases above and beyond the NEAP 2017--18 tier classification, and the effect activates post-2021. Method: district-year Poisson regression on cases 2019--2024 with NEAP-tier dummies, a hand-curated border-adjacency indicator (12 districts on the Afghanistan border), period interaction, and Pol3 control. Result: pooled is\_border\_adjacent coefficient $\approx 0$, $p{=}0.99$; period-stratified border $\times$ post-2021 interaction coefficient 1.75, $p{=}0.079$. Verdict: marginally supported (interaction only).
-
-\paragraph{Experiment E14 --- HDX/WHO cross-source dropout robustness.} Hypothesis: the BCG-Pol3 dropout test in E7 replicates with HDX/WHO immunization indicators. Method: same nested Poisson comparison using the HDX dataset 2016--2023. Result: HDX dataset is missing Pol3 / DTP3 columns for the relevant period; analysis cannot be constructed. Verdict: inconclusive.
-
-\paragraph{Experiment E15 --- Pre-registered confirmatory test of the 80\% Pol3 regime-shift + mobility-FOI dominance + ES Sabin/WPV signature theory.} Hypothesis as described in Section~\ref{sec:final}. Method: AutoExperimentDesigner-produced three-prediction pre-registered protocol; DataVoyager execution in 34 cells with deterministic random seed; pre-joined master panels for data ingestion. Result: all three predictions met their decision rules (Bai-Perron break 2018, threshold $\gamma{=}80.5\%$; mobility-dominance inequality 2.33 with IRR 2.11; ES-signature PPV ratio 2.44 and AUC difference 0.23). Verdict: supported.
-
-\paragraph{Experiment E16 --- Pre-registered confirmatory test of the Cohort Leakage Law.} Hypothesis: under-5-targeted SIA repetition leaves a fraction of each birth cohort aging into the 5+ population with incomplete intestinal immunity; ES is more sensitive than AFP to this older-cohort shedding. Method: AED-designed three-prediction protocol covering temporal subtype shift, ES-to-AFP discordance at finer resolution, and cumulative missed-children proxies. Result: Prediction 2 (ES-AFP discordance) merge collapsed to N=1 province-year (KP, 2024) under the protocol's strict covariate-completeness requirement; per the pre-registered overall rule the combined verdict is forced inconclusive regardless of the other predictions. Verdict: inconclusive (structural).
-
-\section{Datasets}
-\label{app:datasets}
-
-\begin{description}[itemsep=0.3em, leftmargin=2em, labelindent=0em]
-\item[D1.] \textbf{Pakistan district-year WPV1 and cVDPV2 case counts, 2019--2024.} 193 rows covering 131 districts, derived from the poliofreepakistan situation tables (Tables 1--9). Columns: province, district, year, virus\_type, cases.
-
-\item[D2.] \textbf{Pakistan Bureau of Statistics 2023 Census, district-level demographics.} 135 rows. Columns include population\_2023, population\_male, population\_female, sex\_ratio, population\_density, urban\_proportion\_pct, average\_household\_size, population\_2017, growth\_rate\_2017\_2023\_pct.
-
-\item[D3.] \textbf{Pakistan Bureau of Statistics 2023 Census, district age bands.} Long-format file with bands ALL AGES, UNDER 1, UNDER 5, UNDER 10, UNDER 15, ``05 -- 24'', ``15 -- 49'', ``15 -- 64'', ``18 -- 60'', ``18 \& ABOVE'', ``60 \& ABOVE'', ``65 \& ABOVE''. Used in deduplicated form after observing that the original release contained $\approx 5$ duplicate rows per (province, district, age\_band) tuple.
-
-\item[D4.] \textbf{Pakistan Standards of Living Measurement Survey 2019--20, district antigen panel.} District-level coverage for BCG, Penta1, Penta2, Penta3, Pneu1, Pneu2, Pneu3, Polio1, Polio2, Polio3, Measles. Single-year snapshot.
-
-\item[D5.] \textbf{WHO EMRO weekly polio bulletins, 2019--2024.} Province-week environmental surveillance positivity (n\_positives column). 912 individual bulletins also retained in OCR-extracted Markdown form for Sabin-isolate text extraction.
-
-\item[D6.] \textbf{WHO/UNICEF Estimates of National Immunization Coverage (WUENIC), Pakistan 2011--2022.} Long-format file with antigen rows (BCG, DTP3, Pol3, MCV1, HepB3, Hib3, Penta1, Penta3) and year columns; both wuenic\_estimate and wuenic\_reported data sources.
-
-\item[D7.] \textbf{OWID Pakistan annual WPV1 case series, 1980--2023.} 24 rows. Columns: Entity, Code, Year, Wild Poliovirus cases.
-
-\item[D8.] \textbf{OWID global annual WPV1 cases by country, 1980--2023.} Multi-country panel used to extract the Pakistan-Afghanistan pair for the country-pair coupling test.
-
-\item[D9.] \textbf{Pakistan NEAP 2017--2018 district tier classification.} Derived from the published Pakistan National Emergency Action Plan; 9 Tier 1 (core reservoir), 26 Tier 2 (high-risk), 25 Tier 3 (vulnerable), 75 Tier 4 (low-risk) PBS-2023 districts after manual crosswalk. Border-adjacency to Afghanistan flagged for 12 districts (KP former-FATA + Balochistan border).
-
-\item[D10.] \textbf{UNHCR registered Afghan refugees in Pakistan by district, December 2020.} 116 districts, 1{,}435{,}445 individuals. Top districts: Peshawar (308,933), Quetta (189,444), Nowshera (86,972), Haripur (82,022), Kohat (69,962), Karachi (65,745), Pishin (54,764). Source: Humanitarian Data Exchange.
-\end{description}
-
-\section{References}
-
-The following publications informed the background reasoning and are cited where their findings explicitly motivated a hypothesis or experimental design choice.
-
-\begin{description}[itemsep=0.2em, leftmargin=2em, labelindent=0em]
-\item[\textbf{Abbink (2005).}] \textit{Poliovirus-specific memory immunity in seronegative elderly people does not protect against virus excretion.} Journal of Infectious Diseases. Findings on attenuated mucosal immunity in elderly populations informed the older-cohort hypothesis.
-
-\item[\textbf{Anis et al.\ (2013).}] \textit{Insidious reintroduction of wild poliovirus into Israel, 2013.} Eurosurveillance. Documented ES-detected WPV1 circulation without paralytic cases; informed the silent-transmission and surveillance-signature framing.
-
-\item[\textbf{Asghar et al.\ (2017).}] \textit{Environmental surveillance for polioviruses in the Global Polio Eradication Initiative.} Journal of Infectious Diseases. Methodological foundation for ES contribution to eradication endgame; phylogenetic lineage analysis informed the corridor-coupling theory.
-
-\item[\textbf{Blake et al.\ (2014).}] \textit{The role of older children and adults in wild poliovirus transmission.} Proceedings of the National Academy of Sciences. Quantified adult-strata transmission in the Tajikistan 2010 and Republic of Congo 2010 outbreaks; informed L2 hypothesis design.
-
-\item[\textbf{Boot et al.\ (2007).}] \textit{Determinants of monovalent oral poliovirus vaccine mutagenesis in vaccinated elderly people.} Vaccine. Informed the older-cohort biological premise.
-
-\item[\textbf{Buisman et al.\ (2008).}] \textit{Preexisting poliovirus-specific IgA in the circulation correlates with protection against virus excretion in the elderly.} Journal of Infectious Diseases. Informed the elderly mucosal-immunity biological premise.
-
-\item[\textbf{Burton et al.\ (2012).}] \textit{Disease persistence in epidemiological models: The interplay between vaccination and migration.} Mathematical Biosciences. Provided the theoretical framework for the low-turnover persistence regime tested in Experiment E5.
-
-\item[\textbf{Faizan (2024).}] \textit{Re-emergence of polio in Pakistan: Can the nation achieve the WPV1 eradication?} Health Science Reports. Pakistan-specific 2024 review identifying refusal clusters and security-restricted access as proximal drivers of the 2022--2024 resurgence.
-
-\item[\textbf{Grassly (2010).}] \textit{Asymptomatic wild-type poliovirus infection in India among children with previous oral poliovirus vaccination.} Journal of Infectious Diseases. Documented asymptomatic shedding in previously-OPV-vaccinated populations.
-
-\item[\textbf{Hennessey et al.\ (2000).}] \textit{Widespread paralytic poliomyelitis in Pakistan: A case-control study to determine risk factors and implications for poliomyelitis eradication.} Journal of Infectious Diseases. Pakistan-specific household and geographic risk factors.
-
-\item[\textbf{Hussain (2017).}] \textit{Seroprevalence of anti-polio antibodies in children from polio high-risk area: A cross-sectional survey.} BMC Infectious Diseases. Provided the Pakistan-specific seroprevalence anchor.
-
-\item[\textbf{Mangal \& Grassly (2013).}] \textit{Impact of inactivated poliovirus vaccine routine immunization on detection and transmission of poliovirus.} American Journal of Epidemiology. Established that IPV does not block transmission; informed L1 and the Pol3-as-system-reach-proxy hypothesis.
-
-\item[\textbf{Manor et al.\ (1999).}] \textit{Detection of poliovirus circulation by environmental surveillance in the absence of clinical cases in Israel and the Palestinian Authority.} Journal of Clinical Microbiology. ES methodological precedent.
-
-\item[\textbf{O'Reilly et al.\ (2012a).}] \textit{The effect of mass immunisation campaigns and new oral poliovirus vaccines on the incidence of poliomyelitis in Pakistan and Afghanistan, 2001--11.} The Lancet. Documented the Pakistan-Afghanistan corridor and SIA effectiveness.
-
-\item[\textbf{O'Reilly et al.\ (2012b).}] \textit{Mass immunisation campaigns and oral poliovirus vaccines in Pakistan and Afghanistan: a case study.} Companion paper.
-
-\item[\textbf{Pakistan NEAP 2017--2018.}] National Emergency Action Plan for Polio Eradication, 2017--2018. Source for the district-tier classification used in Experiment E13.
-
-\item[\textbf{CDC MMWR Pakistan progress reports.}] Multiple ``Progress Toward Poliomyelitis Eradication --- Pakistan'' publications covering 2016--2024. Used for programmatic context in Section 3.1.
-\end{description}
-
-\end{document}
diff --git a/skills/research-step/templates/examples/theorizer_mission_example.md b/skills/research-step/templates/examples/theorizer_mission_example.md
deleted file mode 100644
index acaa800..0000000
--- a/skills/research-step/templates/examples/theorizer_mission_example.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# Example theorizer mission statement
-
-This is a worked example of the **mission statement** passed to the theorizer in the
-`theorizer_theories` node of the `data_driven_theory_generation` template. It is not the
-run's `mission.md`; it is the prompt the theorizer receives once the per-theme
-reproductions have settled, distilled from `scope.question`, the curated AutoDS laws,
-and the per-theme findings.
-
-A well-formed theorizer mission does five things, and this example shows all five:
-
-1. **States the question** in one sentence, naming the phenomenon and the population of interest.
-2. **Lists the settled empirical findings** (`E*`) that any returned theory must explain, each tagged with the experiment / AutoDS law that established it so the theory stays anchored.
-3. **Lists the open questions** (`Q*`) the theories should address — the gaps reproduction left unresolved.
-4. **States the constraints** (`C*`) — framings already *refuted* by reproduction, so the theorizer does not regenerate them.
-5. **States the rewarded framings** (`R*`) — the mechanistic shapes worth pursuing, anchored back on the laws the run actually reproduced.
-
-Tagging each finding/question/constraint with its supporting experiment is what keeps
-the returned theories anchorable: downstream, `theorizer_theories` drops any theory
-without ≥1 law anchor, and this structure makes the anchor explicit.
-
----
-
-```
-Mission: Generate theories that explain the role of populations aged 5+ years in
-Pakistan's 2022-2024 WPV1 resurgence, anchored on the following settled empirical
-findings and the open questions they leave unresolved.
-
-SETTLED EMPIRICAL FINDINGS (must be explained by any theory):
-  E1. National Pol3 coverage stopped predicting national WPV1 cases around 2018-2019
-      (T1 retry-2, p=0.0005; AutoDS L1 cross-cutting).
-  E2. Pakistan and Afghanistan annual WPV1 case counts are coupled, with the coupling
-      strengthening significantly after 2021 (X2).
-  E3. At the 2022-2024 district level, WPV1 case counts are still positively predicted
-      by under-5 population share, with under-5 share dominating 15-64 working-age
-      share (T2 retry-1).
-  E4. Among districts with both WPV1 and cVDPV2 in 2019-2021, cVDPV2 (not WPV1)
-      dominates in adult-heavy districts (X4, p<0.001).
-  E5. BCG-Pol3 dropout does not outperform Pol3 alone as a predictor at any tested
-      scale (T5 retry-0/1).
-  E6. Border-adjacency adds explanatory power for WPV1 cases only in the post-2021
-      window (X6, p=0.079); resident Afghan refugee stock does not predict WPV1
-      (X7).
-
-OPEN QUESTIONS (theories should address at least one):
-  Q1. What replaced national Pol3 coverage as the dominant transmission lever
-      after 2018-2019?
-  Q2. What specific mobility FLOW (returnees, deportations, transits) post-2021
-      drives the case coupling intensification?
-  Q3. Why does the subtype demographic contrast (cVDPV2 in adult districts, WPV1
-      in young districts) appear?
-  Q4. How do older (>5y) populations contribute to WPV1 transmission given that
-      they are NOT the dominant district-level predictor but ARE plausibly the
-      operative mobility vectors?
-
-CONSTRAINTS (refuted framings to avoid):
-  C1. Theories framing Pol3 as "merely a health-system access proxy" — refuted at
-      district level by T1 retry-1 (LR p=0.0021 rejects dropping Pol3).
-  C2. Theories framing the >5y cohort as the dominant transmission reservoir —
-      refuted at district by T2, at province by T2 retry-4, on silent-transmission
-      signature by X3, and on subtype contrast by X4.
-  C3. Theories grounded primarily in BCG-Pol3 or Penta1-Measles dropout — refuted
-      by T5 retry-0/1.
-  C4. Theories centered on resident Afghan refugee populations as a static mobility
-      channel — refuted by X7.
-
-REWARDED FRAMINGS:
-  R1. Theories that explain the 2018-2019 break date in terms of immunological,
-      programmatic, or product-transition (tOPV→bOPV April 2016) mechanisms.
-  R2. Theories that articulate FLOW-based mobility mechanisms (returnees,
-      deportations, seasonal transit) consistent with the post-2021 intensification.
-  R3. Theories that reconcile the subtype contrast (X4): a single coherent biological
-      / immunological story explaining why cVDPV2 emerges in adult-heavy settings
-      while WPV1 retains a pediatric profile.
-  R4. Theories that integrate older (>5y) populations as mobility VECTORS (carriers)
-      rather than primary RESERVOIRS, consistent with E2, E3, and E6.
-  R5. Theories that explicitly anchor on AutoDS L1 (temporal decoupling) and L4
-      (mobility) — the two laws DV reproduced.
-```
diff --git a/skills/research-step/templates/hypothesis_driven_research.md b/skills/research-step/templates/hypothesis_driven_research.md
deleted file mode 100644
index eb3c847..0000000
--- a/skills/research-step/templates/hypothesis_driven_research.md
+++ /dev/null
@@ -1,50 +0,0 @@
----
-name: hypothesis_driven_research
-description: |
-  Literature-grounded hypothesis generation. Survey the literature, raise a
-  hypothesis per gap, test each, and write a closing report.
----
-
-# Hypothesis-driven research
-
-Survey the literature, raise a hypothesis for each gap, test each one, and write a closing report.
-
-## Flow
-
-```mermaid
-flowchart TD
-  start([start])
-  scope["Scope"]
-  start --> scope
-  definitions["Definitions"]
-  scope --> definitions
-  lit_review["Literature review"]
-  definitions --> lit_review
-  subgraph sub1["for each gap"]
-    direction TB
-    hypothesis["Hypothesis"]
-    experiment_design["Experiment design"]
-    evidence_gathering["Evidence gathering"]
-    analysis["Analysis"]
-    hypothesis --> experiment_design --> evidence_gathering --> analysis
-  end
-  lit_review --> hypothesis
-  closing["Closing synthesis"]
-  analysis --> closing
-  closing --> stop([stop])
-```
-
-## Nodes
-
-| id | type | inputs | description | skills |
-|---|---|---|---|---|
-| `scope` | `scope` | — | One line: the question under study. | — |
-| `definitions` | `definitions` | `scope` | Pin down each term so it's testable against data. | — |
-| `lit_review` | `literature_review` | `scope, definitions` | Survey the literature with `asta literature interactive`. Emit `gaps[]` — one hypothesis per gap. | `asta-preview:find-literature` |
-| `hypothesis` | `hypothesis` | `lit_review` | For each gap: turn it into a falsifiable hypothesis with a concrete prediction. | — |
-| `experiment_design` | `experiment_design` | `hypothesis` | Design an experiment that could falsify the hypothesis. | — |
-| `evidence_gathering` | `evidence_gathering` | `experiment_design` | Locate the data the design needs; note anything that diverged from it. | — |
-| `analysis` | `analysis` | `hypothesis, experiment_design, evidence_gathering` | Get the verdict from DataVoyager (`asta analyze-data submit`), framed on the hypothesis with the gathered data. It must come from a run on real data, not your own reasoning. | `asta-preview:analyze-data` |
-| `closing` | `synthesis` | `analysis` (all hypotheses) | Reconcile the verdicts into one answer to the question. | — |
-
-The `hypothesis` tasks are filled and closed at creation from the literature gaps — see plan.md.
diff --git a/skills/research-step/workflows/brainstorm.md b/skills/research-step/workflows/brainstorm.md
index 884f48f..250ba36 100644
--- a/skills/research-step/workflows/brainstorm.md
+++ b/skills/research-step/workflows/brainstorm.md
@@ -25,7 +25,7 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
@@ -41,10 +41,10 @@ Pick the branch that matches; do not run more than one.
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
-| Single issue's full `metadata.research_step.output` | `bd show <id> --json`                                                                                  |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list`                                                                                              |
-| Dependency structure | `bd dep tree <epic-id> --direction up`|
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output.summary_path` referenced from the digest                         |
+| Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
+| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
+| Task tree | `bd list --json` — ids encode the parent-child outline |
+| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
 | Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
diff --git a/skills/research-step/workflows/execute.md b/skills/research-step/workflows/execute.md
index 3d1a84f..a8596e2 100644
--- a/skills/research-step/workflows/execute.md
+++ b/skills/research-step/workflows/execute.md
@@ -1,35 +1,33 @@
 # Workflow: execute
 
-Run one ready task end-to-end. Loads its schema, gathers its inputs, produces the output, validates it, and closes the issue. After closing, hands off to **plan**, which creates whatever comes next and then chains to **update-summary**.
+Run one ready task end-to-end. Loads its schema, gathers its declared inputs, produces a structured output, validates it, and closes the issue. After closing, hands off to **plan** if the closed task type unlocks new graph structure; otherwise hands off to **update-summary**.
 
 ## Preconditions
 
 - An epic root exists (`scripts/epic-root.sh` prints `status: found`).
-- `bd ready --json` is non-empty, **or** the caller supplied a specific task ID that is currently `open` and unblocked.
+- An open issue with a `task_type` exists, **or** the caller supplied a specific `open` task ID.
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). A hypothesis that restates a gap or finding is filled and closed by **plan** at creation, so it normally won't show up here; if one does, plan couldn't fill it without inventing content — flag it to the user.
+1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
 2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Produce all three task outputs under `.asta/tasks/<id>/` — see the skill's "Task outputs" table for their roles. **All three are mandatory:** `output.json` (matches the schema), `output.md` (the readable result, with links per the template's writing rules), and `artifacts/` (every other file produced). For schema fields ending in `_path`, write the file first and put the relative path in the JSON.
+3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
+4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
+7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
 
-   **If the task delegates to a remote A2A agent** (DataVoyager via `asta analyze-data`, the theorizer via `asta-preview:generate-theories`, the AutoExperimentDesigner via `asta auto-exp-designer`), the output must come from that agent's terminal response. Submit, poll to a terminal state, and wait for the completion notification before validating and closing — **the task is not done while the agent is still running.** Do not fabricate the agent's output, do not port it from a sibling run, and do not move on to the next ready task until this one's agent has returned.
-6. **Validate.** Run `scripts/validate-output.sh <task_type> <metadata-json-file> .asta/tasks/<id>` — **always pass the task dir** so the `output.md` is gated: present (exit 6), non-empty (7), has links (8), no unlinked named entity (9). It also checks the wrapper and every required `output.<key>` for the task_type, plus type spot-checks (e.g., `analysis.verdict` enum, `analysis.confidence` range). When the task produced an `artifacts/report.tex` (the `report` node), it also checks the report has the basics (exits 10–15: PDF, title-page diagram, TOC, ≥8 sections, ≥3 figures, required sections). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close.
-7. **Persist the output.** Write the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`.
-8. **Close.** `bd close <id>`.
-9. **Hand off to plan.** Pass the closed task to **plan**; it creates whatever the template puts next (or no-ops if nothing new is ready), then chains to **update-summary**. Either way `summary.md` ends up rebuilt.
+## Notes on output
 
-## Notes on output files
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
 
-Schema fields ending in `_path` are relative paths. Conventions:
+Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `summary_path` (from `literature_review`) → `background_knowledge.txt` by convention, but any path works.
-- `log_path` (from `evidence_gathering`) → typically under `logs/`.
-- `report_path` (from `synthesis`) → typically `report.md`.
+- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
 
-Write the file before setting the output JSON. If the executor crashes between writing the file and closing the issue, the file is harmless orphan data — re-running `execute` on the same issue will overwrite it.
+If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
 ## Out of scope for this workflow
 
diff --git a/skills/research-step/workflows/init.md b/skills/research-step/workflows/init.md
index 4df19c0..fd11be3 100644
--- a/skills/research-step/workflows/init.md
+++ b/skills/research-step/workflows/init.md
@@ -2,7 +2,7 @@
 
 Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
-After environment setup, hand off to **plan** to bootstrap the mission epic and first tasks.
+After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
 ## Preconditions
 
diff --git a/skills/research-step/workflows/plan.md b/skills/research-step/workflows/plan.md
index 06ae941..a000e2d 100644
--- a/skills/research-step/workflows/plan.md
+++ b/skills/research-step/workflows/plan.md
@@ -1,89 +1,92 @@
 # Workflow: plan
 
-Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state:
+Create or extend the research graph. The flow chains live in `assets/schemas.yaml` (`flows`) — plan reads them, it does not hardcode the sequence. Two modes:
 
-- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier from `mission.md`, per the active template (default `hypothesis_driven_research`).
-- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction.
+- **bootstrap** — no epic yet: pick a flow and lay its first step(s).
+- **replan** — an epic exists: after a step closes, add the next step(s) in its flow chain.
 
-Always chains to **update-summary** afterward so `summary.md` reflects the new graph.
+Always chains to **update-summary** afterward.
 
 ## Preconditions
 
-- `bd` is installed and `.beads/` is initialized. If not, run **init** first.
-- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one.
-- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`.
+- `bd` installed and `.beads/` initialized (else run **init**).
+- **bootstrap**: `mission.md` exists; no epic yet (`scripts/epic-root.sh` → `none`).
+- **replan**: an epic exists; either `execute` supplied the closed source task, or the user named what to extend.
 
-## Issue metadata convention
+## Task metadata
 
-Every task issue carries:
+Create task leaves with `scripts/create-task.sh <parent> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]`. It sets `metadata.research_step = {flow, task_type, inputs, output_schema_version, output_json: null, output_markdown: null}` and a **brief one-line `description`** (it rejects a missing, multi-line, or over-long description). `execute` later publishes `output_json` (the structured result) and `output_markdown` (the narrative) via `close-task.sh`; the description is not overwritten. The epic carries `epic_root: true`; group nodes (loops, fan-outs, branches) are epics created with `bd create --parent <parent> -t epic` (no task_type, no description rules). A session may run several flows — the flow is per task, not per epic.
 
-```json
-{
-  "research_step": {
-    "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|auto_discovery|analysis|synthesis>",
-    "inputs": ["bd-xxxx", "bd-yyyy"],
-    "output_schema_version": 1,
-    "output": null
-  }
-}
-```
+## Indentation is the tree
 
-The mission epic additionally carries `epic_root: true`.
+The flow in `assets/schemas.yaml` is an indented outline, and the beads graph you build **is that same outline**: each indentation level in the flow becomes one parent-child level in beads. Build it with `bd create --parent`, walking the flow top-down, so hierarchical ids (`wf`, `wf.1`, `wf.1.1`, …) encode the outline position. There are **no `blocks`/`deps` edges** — ordering is the id order, because you create nodes in the order they run.
 
-## Mode selection
+Reading a flow node:
 
-1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**.
-2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
-## Bootstrap mode
+The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
-1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**.
-2. **Create the epic.**
-   ```
-   bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)"
-   bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}'
-   ```
-3. **Create the initial frontier.** The active template's first tasks — the nodes up to its first `foreach` — each a `task` issue with the metadata convention above, taking `task_type` and `inputs` from the node's row. (Default template `hypothesis_driven_research`: `scope` → `definitions` → `literature_review`.)
-4. **Add edges.** `parent-child` from each task to the epic, and `blocks` from each node named in another's `inputs`.
-5. **Report.** Print the epic ID and the created task IDs.
+```
+wf                      [epic]    <mission>
+ wf.1                   [loop]    reproduction
+  wf.1.1                          data_driven_discovery
+  wf.1.2                          law_extraction
+  wf.1.3                          evidence_gathering
+  wf.1.4                [fan-out] replication            one branch per law
+   wf.1.4.1             [branch]  <law>
+    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.2                    analysis
+    wf.1.4.1.3                    reproduction_audit
+    wf.1.4.1.4                    reproduce
+   wf.1.4.2             [branch]  <law> …
+  wf.1.5                          reproduction_synthesis
+```
 
-## Replan mode
+The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproduction, `wf.3` theorizer, `wf.4` verification (one branch per testable theory), `wf.5` verification_synthesis, `wf.6` gap_synthesis, `wf.7` final_synthesis. Each sub-flow ends in its own synthesis step that emits a report (provenance_report, reproduction_report, theory_report, verification_report); gap_synthesis aggregates their gaps into data_gaps_report and final_synthesis writes the theory-led research_report.
 
-Read the source task's task_type and output:
+## Ordering and closing (no edges)
 
-```
-bd show <source-id> --json | jq '.[0].metadata.research_step.task_type'
-bd show <source-id> --json | jq '.[0].metadata.research_step.output'
-```
+- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+
+## Static vs data-dependent fan-outs
+
+- **Static** (`theory_generation` by objective): both branches are known up front → create them together.
+- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+
+## Gates (replan)
 
-Find the closed task's node in the active template and create what comes next, taking each new task's `task_type` / `inputs` / `skills` from its row:
+- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
 
-- **Next step:** create the node(s) the diagram points to. Set `inputs` from the row, a `blocks` edge from each, and `parent-child` to the epic.
-- **Foreach:** if the closed node is a `foreach` source, create one copy of the block's tasks per item.
-- **Fan-in:** create a node after a `foreach` only once every copy has closed; block it on those.
-- **Hypotheses** are filled and closed at creation (see below), so also create the step that follows each one — otherwise nothing is left for `execute`. Keep creating whatever just unblocked until the frontier needs an `execute` pass.
-- Stop when the next tasks already exist or the node is a leaf. If a closed `synthesis` lists `output.open_questions`, **stop and ask the user** before creating follow-up `hypothesis` tasks (add a `discovered-from` edge if approved).
+## Bootstrap
 
-If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop.
+1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
+2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
 
-### Auto-resolving hypothesis tasks
+## Replan
 
-When creating a `hypothesis` from a `literature_review` gap or an `auto_discovery` finding — its claim is already stated, so there's no separate `execute` pass, but it still produces `output.json` and `output.md` on disk like any task:
+When a step closes, create the next node(s) under their parent, in flow order:
 
-1. Derive the four output fields from the source — the gap text and surrounding `literature_review` output, or the finding (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`):
-   - `statement` — `H_n: <one-sentence claim>`
-   - `rationale` — why the source implies the claim (for a finding, cite its node id)
-   - `falsifiable_prediction` — what observation would refute it
-   - `expected_evidence` — list of concrete evidence types that would support it
-2. Write `output.json` and `output.md` under `.asta/tasks/<id>/`, then validate: `scripts/validate-output.sh hypothesis <metadata-json-file> .asta/tasks/<id>`.
-3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`.
+- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
+- Apply the **Gates** rules above.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
 
-If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted.
+Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
 ## After either mode
 
-Hand off to **update-summary** so `summary.md` reflects the new state.
+Hand off to **update-summary**. There are no edges to verify — the parent-child tree is the whole structure.
 
-## Not here
+## Out of scope
 
-Running tasks → **execute**. Setup → **init**. Editing `mission.md` → **brainstorm**. Output quality isn't checked here.
+- Running tasks or producing outputs (**execute**).
+- Environment setup (**init**); editing `mission.md` (**brainstorm**); judging output quality.
diff --git a/skills/research-step/workflows/update-summary.md b/skills/research-step/workflows/update-summary.md
index a79f6ff..311c81a 100644
--- a/skills/research-step/workflows/update-summary.md
+++ b/skills/research-step/workflows/update-summary.md
@@ -15,11 +15,10 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** All you need to fill the template comes from a few `bd` queries:
-   - `bd list --json` for the full tree (issue_count, status partition).
-   - `bd ready --json` for the ready list (also drives the Next Steps section).
-   - `bd blocked --json` for the blocked count.
-   Project each list to `{id, task_type: .metadata.research_step.task_type, title}` with `jq` and partition by `.status`.
+3. **Gather state inline.** Everything comes from `bd list --json`:
+   - the full tree (issue_count, status partition);
+   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
+   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
 5. **Overwrite `summary.md`** using this template:
 
@@ -61,13 +60,12 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Ready: <n> — IDs: <list>
-   - Blocked: <n>
+   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
 
    ### Next Steps
-   <from `bd ready --json`: one bullet per ready issue, formatted as
+   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If `bd ready` is empty, write "No ready tasks — graph is blocked or complete.">
+   If there are no open task issues, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)
@@ -79,4 +77,4 @@ Any reader (human or agent) checks freshness by running `scripts/summary-check.s
 ## Out of scope for this workflow
 
 - Mutating beads. `update-summary` is read-only against `.beads/`.
-- Re-planning. Even if `bd ready` is empty and the graph is incomplete, `update-summary` does not create issues.
+- Re-planning. Even if no open tasks remain and the graph is incomplete, `update-summary` does not create issues.

From 5a0e8a6d151c976245bbce8b28d8c348ebae32ca Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Fri, 12 Jun 2026 16:31:52 -0700
Subject: [PATCH 4/6] research-step: v2 output contracts, compiled JSON Schema
 validation, hypothesis-driven flow

schemas.yaml v2: tasks are pure output contracts (key -> type maps), one
outcome verdict vocabulary, immutable adjudication records, A2A 1.0
artifact/part types, config block, and the hypothesis_driven_research flow.
Ships the compiled assets (per-task JSON Schemas, flows.json, flow diagrams);
validate-output.sh deep-validates against them. New next-task.sh (single
ordering definition) and task-output-keys.sh (single schema reader);
bd list --limit 0 throughout; close-task.sh never closes the epic root.
Workflows updated to match; execute.md adds report conventions.
---
 .../skills/research-step/SKILL.md             |    8 +-
 .../assets/compiled/adjudicate.schema.json    |  144 +
 .../assets/compiled/analysis.schema.json      |  119 +
 .../assets/compiled/audit.schema.json         |  127 +
 .../assets/compiled/auto_discovery.mmd        |   18 +
 .../compiled/cohort_assembly.schema.json      |  206 +
 .../compiled/data_acquisition.schema.json     |  161 +
 ..._literature_grounded_theory_generation.mmd |   92 +
 .../data_driven_discovery.schema.json         |  152 +
 .../assets/compiled/data_provenance.mmd       |   16 +
 .../assets/compiled/discovery_run.schema.json |  170 +
 .../compiled/discovery_synthesis.schema.json  |  271 +
 .../compiled/evidence_extraction.schema.json  |  132 +
 .../compiled/evidence_gathering.schema.json   |  121 +
 .../compiled/experiment_design.schema.json    |  162 +
 .../compiled/final_synthesis.schema.json      |  289 +
 .../research-step/assets/compiled/flows.json  | 6657 +++++++++++++++++
 .../assets/compiled/gap_synthesis.schema.json |  221 +
 .../compiled/holdout_replication.schema.json  |  167 +
 .../compiled/hypothesis_driven_research.mmd   |   29 +
 .../compiled/hypothesis_formation.schema.json |  126 +
 .../compiled/hypothesis_synthesis.schema.json |  224 +
 .../compiled/law_extraction.schema.json       |  139 +
 .../compiled/literature_review.schema.json    |  150 +
 .../compiled/novelty_assessment.schema.json   |  147 +
 .../provenance_extraction.schema.json         |  163 +
 .../compiled/provenance_search.schema.json    |  107 +
 .../compiled/provenance_synthesis.schema.json |  230 +
 .../assets/compiled/reproduction.mmd          |   29 +
 .../reproduction_synthesis.schema.json        |  253 +
 .../compiled/testability_triage.schema.json   |  144 +
 .../assets/compiled/theorizer.mmd             |   27 +
 .../compiled/theory_formation.schema.json     |  240 +
 .../compiled/theory_synthesis.schema.json     |  280 +
 .../verification_synthesis.schema.json        |  232 +
 .../skills/research-step/assets/schemas.yaml  |  448 +-
 .../research-step/scripts/close-task.sh       |   19 +-
 .../research-step/scripts/create-task.sh      |   15 +-
 .../skills/research-step/scripts/epic-root.sh |    2 +-
 .../skills/research-step/scripts/next-task.sh |   34 +
 .../research-step/scripts/summary-check.sh    |    2 +-
 .../research-step/scripts/task-output-keys.sh |   37 +
 .../research-step/scripts/validate-output.sh  |   70 +-
 .../research-step/workflows/brainstorm.md     |   16 +-
 .../skills/research-step/workflows/execute.md |   32 +-
 .../skills/research-step/workflows/init.md    |   10 +-
 .../skills/research-step/workflows/plan.md    |   36 +-
 .../research-step/workflows/update-summary.md |   46 +-
 plugins/asta/skills/research-step/SKILL.md    |    8 +-
 .../assets/compiled/adjudicate.schema.json    |  144 +
 .../assets/compiled/analysis.schema.json      |  119 +
 .../assets/compiled/audit.schema.json         |  127 +
 .../assets/compiled/auto_discovery.mmd        |   18 +
 .../compiled/cohort_assembly.schema.json      |  206 +
 .../compiled/data_acquisition.schema.json     |  161 +
 ..._literature_grounded_theory_generation.mmd |   92 +
 .../data_driven_discovery.schema.json         |  152 +
 .../assets/compiled/data_provenance.mmd       |   16 +
 .../assets/compiled/discovery_run.schema.json |  170 +
 .../compiled/discovery_synthesis.schema.json  |  271 +
 .../compiled/evidence_extraction.schema.json  |  132 +
 .../compiled/evidence_gathering.schema.json   |  121 +
 .../compiled/experiment_design.schema.json    |  162 +
 .../compiled/final_synthesis.schema.json      |  289 +
 .../research-step/assets/compiled/flows.json  | 6657 +++++++++++++++++
 .../assets/compiled/gap_synthesis.schema.json |  221 +
 .../compiled/holdout_replication.schema.json  |  167 +
 .../compiled/hypothesis_driven_research.mmd   |   29 +
 .../compiled/hypothesis_formation.schema.json |  126 +
 .../compiled/hypothesis_synthesis.schema.json |  224 +
 .../compiled/law_extraction.schema.json       |  139 +
 .../compiled/literature_review.schema.json    |  150 +
 .../compiled/novelty_assessment.schema.json   |  147 +
 .../provenance_extraction.schema.json         |  163 +
 .../compiled/provenance_search.schema.json    |  107 +
 .../compiled/provenance_synthesis.schema.json |  230 +
 .../assets/compiled/reproduction.mmd          |   29 +
 .../reproduction_synthesis.schema.json        |  253 +
 .../compiled/testability_triage.schema.json   |  144 +
 .../assets/compiled/theorizer.mmd             |   27 +
 .../compiled/theory_formation.schema.json     |  240 +
 .../compiled/theory_synthesis.schema.json     |  280 +
 .../verification_synthesis.schema.json        |  232 +
 .../skills/research-step/assets/schemas.yaml  |  448 +-
 .../research-step/scripts/close-task.sh       |   19 +-
 .../research-step/scripts/create-task.sh      |   15 +-
 .../skills/research-step/scripts/epic-root.sh |    2 +-
 .../skills/research-step/scripts/next-task.sh |   34 +
 .../research-step/scripts/summary-check.sh    |    2 +-
 .../research-step/scripts/task-output-keys.sh |   37 +
 .../research-step/scripts/validate-output.sh  |   70 +-
 .../research-step/workflows/brainstorm.md     |   16 +-
 .../skills/research-step/workflows/execute.md |   32 +-
 .../skills/research-step/workflows/init.md    |   10 +-
 .../skills/research-step/workflows/plan.md    |   36 +-
 .../research-step/workflows/update-summary.md |   46 +-
 skills/research-step/SKILL.md                 |    8 +-
 .../assets/compiled/adjudicate.schema.json    |  144 +
 .../assets/compiled/analysis.schema.json      |  119 +
 .../assets/compiled/audit.schema.json         |  127 +
 .../assets/compiled/auto_discovery.mmd        |   18 +
 .../compiled/cohort_assembly.schema.json      |  206 +
 .../compiled/data_acquisition.schema.json     |  161 +
 ..._literature_grounded_theory_generation.mmd |   92 +
 .../data_driven_discovery.schema.json         |  152 +
 .../assets/compiled/data_provenance.mmd       |   16 +
 .../assets/compiled/discovery_run.schema.json |  170 +
 .../compiled/discovery_synthesis.schema.json  |  271 +
 .../compiled/evidence_extraction.schema.json  |  132 +
 .../compiled/evidence_gathering.schema.json   |  121 +
 .../compiled/experiment_design.schema.json    |  162 +
 .../compiled/final_synthesis.schema.json      |  289 +
 .../research-step/assets/compiled/flows.json  | 6657 +++++++++++++++++
 .../assets/compiled/gap_synthesis.schema.json |  221 +
 .../compiled/holdout_replication.schema.json  |  167 +
 .../compiled/hypothesis_driven_research.mmd   |   29 +
 .../compiled/hypothesis_formation.schema.json |  126 +
 .../compiled/hypothesis_synthesis.schema.json |  224 +
 .../compiled/law_extraction.schema.json       |  139 +
 .../compiled/literature_review.schema.json    |  150 +
 .../compiled/novelty_assessment.schema.json   |  147 +
 .../provenance_extraction.schema.json         |  163 +
 .../compiled/provenance_search.schema.json    |  107 +
 .../compiled/provenance_synthesis.schema.json |  230 +
 .../assets/compiled/reproduction.mmd          |   29 +
 .../reproduction_synthesis.schema.json        |  253 +
 .../compiled/testability_triage.schema.json   |  144 +
 .../assets/compiled/theorizer.mmd             |   27 +
 .../compiled/theory_formation.schema.json     |  240 +
 .../compiled/theory_synthesis.schema.json     |  280 +
 .../verification_synthesis.schema.json        |  232 +
 skills/research-step/assets/schemas.yaml      |  448 +-
 skills/research-step/scripts/close-task.sh    |   19 +-
 skills/research-step/scripts/create-task.sh   |   15 +-
 skills/research-step/scripts/epic-root.sh     |    2 +-
 skills/research-step/scripts/next-task.sh     |   34 +
 skills/research-step/scripts/summary-check.sh |    2 +-
 .../research-step/scripts/task-output-keys.sh |   37 +
 .../research-step/scripts/validate-output.sh  |   70 +-
 skills/research-step/workflows/brainstorm.md  |   16 +-
 skills/research-step/workflows/execute.md     |   32 +-
 skills/research-step/workflows/init.md        |   10 +-
 skills/research-step/workflows/plan.md        |   36 +-
 .../research-step/workflows/update-summary.md |   46 +-
 144 files changed, 36864 insertions(+), 696 deletions(-)
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/flows.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
 create mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
 create mode 100755 plugins/asta-preview/skills/research-step/scripts/next-task.sh
 create mode 100755 plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/audit.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/flows.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
 create mode 100644 plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
 create mode 100755 plugins/asta/skills/research-step/scripts/next-task.sh
 create mode 100755 plugins/asta/skills/research-step/scripts/task-output-keys.sh
 create mode 100644 skills/research-step/assets/compiled/adjudicate.schema.json
 create mode 100644 skills/research-step/assets/compiled/analysis.schema.json
 create mode 100644 skills/research-step/assets/compiled/audit.schema.json
 create mode 100644 skills/research-step/assets/compiled/auto_discovery.mmd
 create mode 100644 skills/research-step/assets/compiled/cohort_assembly.schema.json
 create mode 100644 skills/research-step/assets/compiled/data_acquisition.schema.json
 create mode 100644 skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 create mode 100644 skills/research-step/assets/compiled/data_driven_discovery.schema.json
 create mode 100644 skills/research-step/assets/compiled/data_provenance.mmd
 create mode 100644 skills/research-step/assets/compiled/discovery_run.schema.json
 create mode 100644 skills/research-step/assets/compiled/discovery_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/evidence_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/evidence_gathering.schema.json
 create mode 100644 skills/research-step/assets/compiled/experiment_design.schema.json
 create mode 100644 skills/research-step/assets/compiled/final_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/flows.json
 create mode 100644 skills/research-step/assets/compiled/gap_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/holdout_replication.schema.json
 create mode 100644 skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 create mode 100644 skills/research-step/assets/compiled/hypothesis_formation.schema.json
 create mode 100644 skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/law_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/literature_review.schema.json
 create mode 100644 skills/research-step/assets/compiled/novelty_assessment.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_search.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/reproduction.mmd
 create mode 100644 skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/testability_triage.schema.json
 create mode 100644 skills/research-step/assets/compiled/theorizer.mmd
 create mode 100644 skills/research-step/assets/compiled/theory_formation.schema.json
 create mode 100644 skills/research-step/assets/compiled/theory_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/verification_synthesis.schema.json
 create mode 100755 skills/research-step/scripts/next-task.sh
 create mode 100755 skills/research-step/scripts/task-output-keys.sh

diff --git a/plugins/asta-preview/skills/research-step/SKILL.md b/plugins/asta-preview/skills/research-step/SKILL.md
index 49a7fec..e9f9a8c 100644
--- a/plugins/asta-preview/skills/research-step/SKILL.md
+++ b/plugins/asta-preview/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Bash(asta:*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `hypothesis_driven_research` flow (literature → falsifiable hypotheses → one prespecified test per hypothesis), the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; run it as its own session in a **separate workspace** — own `mission.md` and `.beads` — typically kicked off after a theory-generation run; a second epic root in the same workspace breaks `scripts/epic-root.sh`), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types` (immutable records — verdicts are `adjudication` records referencing their subject), the `tasks` (pure output contracts mapping each output key to its type), and the `flows` (each step carrying its `mission`, its `input` steps, and its asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -23,7 +23,7 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 | `mission.md` | Input. The research task. |
 | `.beads/` | Source of truth for state. |
 | `summary.md` | Derived view of the session, regenerated by **update-summary**. Beads is the source of truth; this file is just a digest for humans and for **brainstorm**. Frontmatter `beads_snapshot` records the state it was rendered from. |
-| `background_knowledge.txt` | Optional. Long-form context referenced from issue metadata via `summary_path`. |
+| `.asta/<agent>/<slug>/` | Heavy artifacts (raw agent JSON, datasets, reports), referenced from `output_json` by repo-root-relative `_path` fields. |
 
 ## Workflows
 
@@ -51,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off rule in `execute.md`, last step); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
new file mode 100644
index 0000000..ccfb9d1
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/adjudicate.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "artifacts"
+  ],
+  "title": "adjudicate",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
new file mode 100644
index 0000000..55e557d
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
@@ -0,0 +1,119 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "analysis": {
+      "additionalProperties": true,
+      "properties": {
+        "assumptions": {
+          "type": "string"
+        },
+        "code": {
+          "type": "string"
+        },
+        "final_answer": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "final_answer",
+        "assumptions",
+        "code"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/analysis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "analysis": {
+      "$ref": "#/$defs/analysis"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "analysis",
+    "figures",
+    "artifacts"
+  ],
+  "title": "analysis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
new file mode 100644
index 0000000..ca21120
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
@@ -0,0 +1,127 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "audit_report": {
+      "additionalProperties": true,
+      "properties": {
+        "artifacts_found": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "challenges": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "check": {
+                "type": "string"
+              },
+              "concern": {
+                "type": "string"
+              },
+              "outcome": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "concern",
+              "check",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "recommended_adjustment": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "verdict_survives": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "subject_id",
+        "challenges",
+        "artifacts_found",
+        "verdict_survives",
+        "recommended_adjustment"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/audit.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "audit_report": {
+      "$ref": "#/$defs/audit_report"
+    }
+  },
+  "required": [
+    "audit_report",
+    "artifacts"
+  ],
+  "title": "audit",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
new file mode 100644
index 0000000..14cd992
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
@@ -0,0 +1,18 @@
+%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
+  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
+  subgraph replication["replication (at replan)"]
+    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
+  end
+  class replication replan
+  discovery_synthesis["discovery_synthesis"]
+  cohort_assembly --> discovery_run
+  discovery_run --> replication__holdout_replication
+  cohort_assembly --> replication__holdout_replication
+  discovery_run --> discovery_synthesis
+  replication --> discovery_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
new file mode 100644
index 0000000..4866540
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
@@ -0,0 +1,206 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "cohort": {
+      "additionalProperties": true,
+      "properties": {
+        "discovery_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "exclusion_criteria": {
+          "type": "string"
+        },
+        "holdout_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "id": {
+          "type": "string"
+        },
+        "inclusion_criteria": {
+          "type": "string"
+        },
+        "research_question": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source_data_sources": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "research_question",
+        "inclusion_criteria",
+        "exclusion_criteria",
+        "sampling",
+        "source_data_sources",
+        "discovery_subset",
+        "holdout_subset",
+        "run_id"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/cohort_assembly.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "cohort": {
+      "$ref": "#/$defs/cohort"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "cohort",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "cohort_assembly",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
new file mode 100644
index 0000000..0bec23c
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
@@ -0,0 +1,161 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "acquisition": {
+      "additionalProperties": true,
+      "properties": {
+        "access_status": {
+          "enum": [
+            "acquired",
+            "open_unfetched",
+            "restricted",
+            "not_found"
+          ]
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "dataset_id": {
+          "type": "string"
+        },
+        "local_path": {
+          "type": "string"
+        },
+        "validation_note": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "access_status",
+        "local_path",
+        "dataset_id",
+        "validation_note"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_acquisition.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "acquisitions": {
+      "items": {
+        "$ref": "#/$defs/acquisition"
+      },
+      "type": "array"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "acquisitions",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_acquisition",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
new file mode 100644
index 0000000..cb56eed
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
@@ -0,0 +1,92 @@
+%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  subgraph data_provenance["data_provenance [flow: data_provenance]"]
+    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    data_provenance__provenance_synthesis["provenance_synthesis"]
+  end
+  class data_provenance embed
+  subgraph reproduction["reproduction [flow: reproduction]"]
+    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+    reproduction__law_extraction["law_extraction"]
+    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+    subgraph reproduction__replication["replication (at replan)"]
+      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
+      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__adjudicate["adjudicate"]
+    end
+    class reproduction__replication replan
+    reproduction__reproduction_synthesis["reproduction_synthesis"]
+  end
+  class reproduction embed
+  subgraph theorizer["theorizer [flow: theorizer]"]
+    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    subgraph theorizer__theory_generation["theory_generation"]
+      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+    end
+    theorizer__testability_triage["testability_triage"]
+    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+    theorizer__theory_synthesis["theory_synthesis"]
+  end
+  class theorizer embed
+  subgraph verification["verification (at replan)"]
+    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__adjudicate["adjudicate"]
+  end
+  class verification replan
+  verification_synthesis["verification_synthesis"]
+  gap_synthesis["gap_synthesis"]
+  final_synthesis["final_synthesis"]
+  data_provenance__provenance_search --> data_provenance__provenance_extraction
+  data_provenance__provenance_search --> data_provenance__data_acquisition
+  data_provenance__provenance_extraction --> data_provenance__data_acquisition
+  data_provenance__provenance_search --> data_provenance__provenance_synthesis
+  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
+  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
+  reproduction__data_driven_discovery --> reproduction__law_extraction
+  reproduction__law_extraction --> reproduction__evidence_gathering
+  reproduction__law_extraction --> reproduction__replication__experiment_design
+  reproduction__evidence_gathering --> reproduction__replication__experiment_design
+  reproduction__replication__experiment_design --> reproduction__replication__analysis
+  reproduction__evidence_gathering --> reproduction__replication__analysis
+  reproduction__replication__analysis --> reproduction__replication__audit
+  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
+  reproduction__replication__analysis --> reproduction__replication__adjudicate
+  reproduction__replication__audit --> reproduction__replication__adjudicate
+  reproduction__law_extraction --> reproduction__reproduction_synthesis
+  reproduction__replication --> reproduction__reproduction_synthesis
+  reproduction__law_extraction --> theorizer__evidence_extraction
+  reproduction__replication__adjudicate --> theorizer__evidence_extraction
+  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
+  theorizer__theory_generation --> theorizer__testability_triage
+  reproduction__data_driven_discovery --> theorizer__testability_triage
+  reproduction__evidence_gathering --> theorizer__testability_triage
+  theorizer__testability_triage --> theorizer__novelty_assessment
+  theorizer__theory_generation --> theorizer__theory_synthesis
+  theorizer__novelty_assessment --> theorizer__theory_synthesis
+  theorizer__testability_triage --> theorizer__theory_synthesis
+  theorizer__testability_triage --> verification__analysis
+  reproduction__data_driven_discovery --> verification__analysis
+  reproduction__evidence_gathering --> verification__analysis
+  verification__analysis --> verification__audit
+  theorizer__testability_triage --> verification__adjudicate
+  verification__analysis --> verification__adjudicate
+  verification__audit --> verification__adjudicate
+  verification --> verification_synthesis
+  theorizer__novelty_assessment --> verification_synthesis
+  data_provenance__provenance_synthesis --> gap_synthesis
+  reproduction__reproduction_synthesis --> gap_synthesis
+  theorizer__theory_synthesis --> gap_synthesis
+  verification_synthesis --> gap_synthesis
+  data_provenance__provenance_synthesis --> final_synthesis
+  reproduction__reproduction_synthesis --> final_synthesis
+  theorizer__theory_synthesis --> final_synthesis
+  verification_synthesis --> final_synthesis
+  gap_synthesis --> final_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
new file mode 100644
index 0000000..14f65a7
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
@@ -0,0 +1,152 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_driven_discovery.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_driven_discovery",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
new file mode 100644
index 0000000..3b46977
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
@@ -0,0 +1,16 @@
+%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+  provenance_synthesis["provenance_synthesis"]
+  provenance_search --> provenance_extraction
+  provenance_search --> data_acquisition
+  provenance_extraction --> data_acquisition
+  provenance_search --> provenance_synthesis
+  provenance_extraction --> provenance_synthesis
+  data_acquisition --> provenance_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
new file mode 100644
index 0000000..b7ac259
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
@@ -0,0 +1,170 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_run.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "discovery_run",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
new file mode 100644
index 0000000..29cb31f
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
@@ -0,0 +1,271 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "discovery_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "interpretation": {
+          "type": "string"
+        },
+        "laws": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "deciding_experiment": {
+                "type": "string"
+              },
+              "effect_size_discovery": {
+                "type": "string"
+              },
+              "effect_size_holdout": {
+                "type": "string"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "surprise": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "surprise",
+              "outcome",
+              "deciding_experiment",
+              "effect_size_discovery",
+              "effect_size_holdout"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "run_id",
+        "laws",
+        "interpretation",
+        "next_steps",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "discovery_report": {
+      "$ref": "#/$defs/discovery_report"
+    }
+  },
+  "required": [
+    "discovery_report",
+    "artifacts"
+  ],
+  "title": "discovery_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
new file mode 100644
index 0000000..7a53a5b
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
@@ -0,0 +1,132 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "artifacts"
+  ],
+  "title": "evidence_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
new file mode 100644
index 0000000..c310796
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
@@ -0,0 +1,121 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_gathering.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "datasets",
+    "artifacts"
+  ],
+  "title": "evidence_gathering",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
new file mode 100644
index 0000000..458fe42
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
@@ -0,0 +1,162 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "experiment_design": {
+      "additionalProperties": true,
+      "properties": {
+        "construct_equivalence": {
+          "enum": [
+            "equivalent",
+            "proxy",
+            "mismatch"
+          ]
+        },
+        "data_gap": {
+          "type": "string"
+        },
+        "experiment_design_query": {
+          "type": "string"
+        },
+        "experiment_name": {
+          "type": "string"
+        },
+        "feasibility": {
+          "enum": [
+            "feasible",
+            "proxy_only",
+            "data_unavailable",
+            "construct_mismatch"
+          ]
+        },
+        "independent_operationalization": {
+          "type": "string"
+        },
+        "plain_language_description": {
+          "type": "string"
+        },
+        "prespecified": {
+          "additionalProperties": true,
+          "properties": {
+            "metric": {
+              "type": "string"
+            },
+            "success_threshold": {
+              "type": "string"
+            },
+            "test": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "test",
+            "metric",
+            "success_threshold"
+          ],
+          "type": "object"
+        },
+        "required_data": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "experiment_name",
+        "plain_language_description",
+        "source_operationalization",
+        "independent_operationalization",
+        "construct_equivalence",
+        "feasibility",
+        "required_data",
+        "data_gap",
+        "experiment_design_query",
+        "prespecified"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/experiment_design.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "experiment_design": {
+      "$ref": "#/$defs/experiment_design"
+    }
+  },
+  "required": [
+    "experiment_design",
+    "artifacts"
+  ],
+  "title": "experiment_design",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
new file mode 100644
index 0000000..b00f085
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
@@ -0,0 +1,289 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "research_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "inference_chain": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "chain": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "claim": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "claim",
+              "chain"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sub_reports": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "report_path",
+              "one_line"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "tensions_and_surprises": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "evidence": {
+                "type": "string"
+              },
+              "observation": {
+                "type": "string"
+              },
+              "where": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "observation",
+              "where",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_highlights": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "claim": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_was_done": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theory_highlights",
+        "inference_chain",
+        "what_was_done",
+        "sub_reports",
+        "tensions_and_surprises",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/final_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "research_report": {
+      "$ref": "#/$defs/research_report"
+    }
+  },
+  "required": [
+    "research_report",
+    "artifacts"
+  ],
+  "title": "final_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/flows.json b/plugins/asta-preview/skills/research-step/assets/compiled/flows.json
new file mode 100644
index 0000000..907a432
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/flows.json
@@ -0,0 +1,6657 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "flows": {
+    "auto_discovery": {
+      "edges": [
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "discovery_run"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "discovery_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "discovery_synthesis"
+        }
+      ],
+      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta documents",
+            "asta generate-theories find-and-extract",
+            "asta autodiscovery create",
+            "asta autodiscovery upload",
+            "asta autodiscovery metadata"
+          ],
+          "id": "cohort_assembly",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
+          "name": "cohort_assembly",
+          "parent": null,
+          "replan": false,
+          "task": "cohort_assembly"
+        },
+        {
+          "chain": [
+            "asta autodiscovery submit",
+            "asta autodiscovery experiments"
+          ],
+          "id": "discovery_run",
+          "inputs": [
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
+          "name": "discovery_run",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_run"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__holdout_replication",
+          "inputs": [
+            "discovery_run",
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "holdout_replication",
+          "parent": "replication",
+          "replan": false,
+          "task": "holdout_replication"
+        },
+        {
+          "chain": [],
+          "id": "discovery_synthesis",
+          "inputs": [
+            "discovery_run",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
+          "name": "discovery_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_synthesis"
+        }
+      ]
+    },
+    "data_and_literature_grounded_theory_generation": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_provenance__data_acquisition",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "reproduction__law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "reproduction__replication__audit",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "reproduction__replication",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "adjudicate",
+          "source": "reproduction__replication__adjudicate",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "theorizer__evidence_extraction",
+          "target": "theorizer__theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__audit"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "verification__audit",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "verification",
+          "source": "verification",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "gap_synthesis",
+          "source": "gap_synthesis",
+          "target": "final_synthesis"
+        }
+      ],
+      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
+      "nodes": [
+        {
+          "id": "data_provenance",
+          "kind": "embed",
+          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
+          "name": "data_provenance",
+          "parent": null,
+          "replan": false,
+          "workflow": "data_provenance"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "data_provenance__provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "data_provenance__provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_provenance__data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "data_provenance__provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_synthesis"
+        },
+        {
+          "id": "reproduction",
+          "kind": "embed",
+          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
+          "name": "reproduction",
+          "parent": null,
+          "replan": false,
+          "workflow": "reproduction"
+        },
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "reproduction__data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "reproduction__evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "reproduction__replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": "reproduction",
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "reproduction__replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "reproduction_synthesis"
+        },
+        {
+          "id": "theorizer",
+          "kind": "embed",
+          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
+          "name": "theorizer",
+          "parent": null,
+          "replan": false,
+          "workflow": "theorizer"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "theorizer__evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theorizer__theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": "theorizer",
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theorizer__theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theorizer__theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "theorizer__novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "verification",
+          "kind": "group",
+          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
+          "name": "verification",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__analysis",
+          "inputs": [
+            "testability_triage",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "verification",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "verification",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "verification__adjudicate",
+          "inputs": [
+            "testability_triage",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
+          "name": "adjudicate",
+          "parent": "verification",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "verification_synthesis",
+          "inputs": [
+            "verification",
+            "novelty_assessment"
+          ],
+          "kind": "step",
+          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
+          "name": "verification_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "verification_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "gap_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
+          "name": "gap_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "gap_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "final_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis",
+            "gap_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
+          "name": "final_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "final_synthesis"
+        }
+      ]
+    },
+    "data_provenance": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_acquisition",
+          "target": "provenance_synthesis"
+        }
+      ],
+      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": null,
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_synthesis"
+        }
+      ]
+    },
+    "hypothesis_driven_research": {
+      "edges": [
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "hypothesis_formation"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "testing__data_acquisition",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "testing__audit",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "hypothesis_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testing",
+          "source": "testing",
+          "target": "hypothesis_synthesis"
+        }
+      ],
+      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "literature_review",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
+          "name": "literature_review",
+          "parent": null,
+          "replan": false,
+          "task": "literature_review"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "hypothesis_formation",
+          "inputs": [
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
+          "name": "hypothesis_formation",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_formation"
+        },
+        {
+          "id": "testing",
+          "kind": "group",
+          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
+          "name": "testing",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "testing__experiment_design",
+          "inputs": [
+            "hypothesis_formation",
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "testing",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "testing__data_acquisition",
+          "inputs": [
+            "experiment_design"
+          ],
+          "kind": "step",
+          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
+          "name": "data_acquisition",
+          "parent": "testing",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__analysis",
+          "inputs": [
+            "experiment_design",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "testing",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "testing",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "testing__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
+          "name": "adjudicate",
+          "parent": "testing",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "hypothesis_synthesis",
+          "inputs": [
+            "hypothesis_formation",
+            "testing"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
+          "name": "hypothesis_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_synthesis"
+        }
+      ]
+    },
+    "reproduction": {
+      "edges": [
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "data_driven_discovery",
+          "target": "law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "replication__audit",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "reproduction_synthesis"
+        }
+      ],
+      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
+      "nodes": [
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "reproduction_synthesis"
+        }
+      ]
+    },
+    "theorizer": {
+      "edges": [
+        {
+          "external": true,
+          "input": "law_extraction",
+          "source": "ext__law_extraction",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": true,
+          "input": "adjudicate",
+          "source": "ext__adjudicate",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "evidence_extraction",
+          "target": "theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "data_driven_discovery",
+          "source": "ext__data_driven_discovery",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "evidence_gathering",
+          "source": "ext__evidence_gathering",
+          "target": "testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "novelty_assessment",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "theory_synthesis"
+        }
+      ],
+      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
+      "nodes": [
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": null,
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": null,
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": null,
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "ext__adjudicate",
+          "kind": "external",
+          "mission": "",
+          "name": "adjudicate",
+          "parent": null,
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "id": "ext__data_driven_discovery",
+          "kind": "external",
+          "mission": "",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "id": "ext__evidence_gathering",
+          "kind": "external",
+          "mission": "",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "ext__law_extraction",
+          "kind": "external",
+          "mission": "",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        }
+      ]
+    }
+  },
+  "format_version": 1,
+  "schema_version": 2,
+  "tasks": {
+    "adjudicate": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/adjudicate.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "artifacts"
+        ],
+        "title": "adjudicate",
+        "type": "object"
+      }
+    },
+    "analysis": {
+      "output": {
+        "analysis": "analysis",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "analysis": {
+            "additionalProperties": true,
+            "properties": {
+              "assumptions": {
+                "type": "string"
+              },
+              "code": {
+                "type": "string"
+              },
+              "final_answer": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "final_answer",
+              "assumptions",
+              "code"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/analysis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "analysis": {
+            "$ref": "#/$defs/analysis"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "analysis",
+          "figures",
+          "artifacts"
+        ],
+        "title": "analysis",
+        "type": "object"
+      }
+    },
+    "audit": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "audit_report": "audit_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "audit_report": {
+            "additionalProperties": true,
+            "properties": {
+              "artifacts_found": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "challenges": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "check": {
+                      "type": "string"
+                    },
+                    "concern": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "concern",
+                    "check",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "recommended_adjustment": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "verdict_survives": {
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "subject_id",
+              "challenges",
+              "artifacts_found",
+              "verdict_survives",
+              "recommended_adjustment"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/audit.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "audit_report": {
+            "$ref": "#/$defs/audit_report"
+          }
+        },
+        "required": [
+          "audit_report",
+          "artifacts"
+        ],
+        "title": "audit",
+        "type": "object"
+      }
+    },
+    "cohort_assembly": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "cohort": "cohort",
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "cohort": {
+            "additionalProperties": true,
+            "properties": {
+              "discovery_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "exclusion_criteria": {
+                "type": "string"
+              },
+              "holdout_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "id": {
+                "type": "string"
+              },
+              "inclusion_criteria": {
+                "type": "string"
+              },
+              "research_question": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source_data_sources": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "research_question",
+              "inclusion_criteria",
+              "exclusion_criteria",
+              "sampling",
+              "source_data_sources",
+              "discovery_subset",
+              "holdout_subset",
+              "run_id"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/cohort_assembly.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "cohort": {
+            "$ref": "#/$defs/cohort"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "cohort",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "cohort_assembly",
+        "type": "object"
+      }
+    },
+    "data_acquisition": {
+      "output": {
+        "acquisitions": [
+          "acquisition"
+        ],
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "acquisition": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "validation_note": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "access_status",
+              "local_path",
+              "dataset_id",
+              "validation_note"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_acquisition.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "acquisitions": {
+            "items": {
+              "$ref": "#/$defs/acquisition"
+            },
+            "type": "array"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "acquisitions",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_acquisition",
+        "type": "object"
+      }
+    },
+    "data_driven_discovery": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_driven_discovery.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_driven_discovery",
+        "type": "object"
+      }
+    },
+    "discovery_run": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_run.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "discovery_run",
+        "type": "object"
+      }
+    },
+    "discovery_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "discovery_report": "discovery_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "discovery_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "interpretation": {
+                "type": "string"
+              },
+              "laws": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "deciding_experiment": {
+                      "type": "string"
+                    },
+                    "effect_size_discovery": {
+                      "type": "string"
+                    },
+                    "effect_size_holdout": {
+                      "type": "string"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "surprise": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "surprise",
+                    "outcome",
+                    "deciding_experiment",
+                    "effect_size_discovery",
+                    "effect_size_holdout"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "run_id",
+              "laws",
+              "interpretation",
+              "next_steps",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "discovery_report": {
+            "$ref": "#/$defs/discovery_report"
+          }
+        },
+        "required": [
+          "discovery_report",
+          "artifacts"
+        ],
+        "title": "discovery_synthesis",
+        "type": "object"
+      }
+    },
+    "evidence_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "artifacts"
+        ],
+        "title": "evidence_extraction",
+        "type": "object"
+      }
+    },
+    "evidence_gathering": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_gathering.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "datasets",
+          "artifacts"
+        ],
+        "title": "evidence_gathering",
+        "type": "object"
+      }
+    },
+    "experiment_design": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "experiment_design": "experiment_design"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "experiment_design": {
+            "additionalProperties": true,
+            "properties": {
+              "construct_equivalence": {
+                "enum": [
+                  "equivalent",
+                  "proxy",
+                  "mismatch"
+                ]
+              },
+              "data_gap": {
+                "type": "string"
+              },
+              "experiment_design_query": {
+                "type": "string"
+              },
+              "experiment_name": {
+                "type": "string"
+              },
+              "feasibility": {
+                "enum": [
+                  "feasible",
+                  "proxy_only",
+                  "data_unavailable",
+                  "construct_mismatch"
+                ]
+              },
+              "independent_operationalization": {
+                "type": "string"
+              },
+              "plain_language_description": {
+                "type": "string"
+              },
+              "prespecified": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "experiment_name",
+              "plain_language_description",
+              "source_operationalization",
+              "independent_operationalization",
+              "construct_equivalence",
+              "feasibility",
+              "required_data",
+              "data_gap",
+              "experiment_design_query",
+              "prespecified"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/experiment_design.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "experiment_design": {
+            "$ref": "#/$defs/experiment_design"
+          }
+        },
+        "required": [
+          "experiment_design",
+          "artifacts"
+        ],
+        "title": "experiment_design",
+        "type": "object"
+      }
+    },
+    "final_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "research_report": "research_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "research_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "inference_chain": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "chain": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "claim": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "claim",
+                    "chain"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sub_reports": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "kind": {
+                      "type": "string"
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "report_path": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "kind",
+                    "report_path",
+                    "one_line"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "tensions_and_surprises": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "observation": {
+                      "type": "string"
+                    },
+                    "where": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "observation",
+                    "where",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_highlights": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "claim": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_was_done": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theory_highlights",
+              "inference_chain",
+              "what_was_done",
+              "sub_reports",
+              "tensions_and_surprises",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/final_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "research_report": {
+            "$ref": "#/$defs/research_report"
+          }
+        },
+        "required": [
+          "research_report",
+          "artifacts"
+        ],
+        "title": "final_synthesis",
+        "type": "object"
+      }
+    },
+    "gap_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_gaps_report": "data_gaps_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_gaps_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "arose_in": {
+                      "type": "string"
+                    },
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity",
+                    "arose_in"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "gaps",
+              "next_steps",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/gap_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_gaps_report": {
+            "$ref": "#/$defs/data_gaps_report"
+          }
+        },
+        "required": [
+          "data_gaps_report",
+          "artifacts"
+        ],
+        "title": "gap_synthesis",
+        "type": "object"
+      }
+    },
+    "holdout_replication": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/holdout_replication.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "figures",
+          "artifacts"
+        ],
+        "title": "holdout_replication",
+        "type": "object"
+      }
+    },
+    "hypothesis_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypotheses": [
+          "hypothesis"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "hypothesis": {
+            "additionalProperties": true,
+            "properties": {
+              "falsifiable_prediction": {
+                "type": "string"
+              },
+              "grounds": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "rationale": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "rationale",
+              "falsifiable_prediction",
+              "grounds"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypotheses": {
+            "items": {
+              "$ref": "#/$defs/hypothesis"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "hypotheses",
+          "artifacts"
+        ],
+        "title": "hypothesis_formation",
+        "type": "object"
+      }
+    },
+    "hypothesis_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypothesis_report": "hypothesis_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "hypothesis_report": {
+            "additionalProperties": true,
+            "properties": {
+              "answer": {
+                "type": "string"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "hypothesis_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "hypothesis_id",
+                    "statement",
+                    "outcome",
+                    "effect_size_observed",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_questions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "question": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "question",
+              "ledger",
+              "answer",
+              "open_questions",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypothesis_report": {
+            "$ref": "#/$defs/hypothesis_report"
+          }
+        },
+        "required": [
+          "hypothesis_report",
+          "artifacts"
+        ],
+        "title": "hypothesis_synthesis",
+        "type": "object"
+      }
+    },
+    "law_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/law_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "law_extraction",
+        "type": "object"
+      }
+    },
+    "literature_review": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "literature_review": "literature_review"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "literature_review": {
+            "additionalProperties": true,
+            "properties": {
+              "citations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "corpus_id": {
+                      "type": "number"
+                    },
+                    "id": {
+                      "type": "string"
+                    },
+                    "relevance": {
+                      "type": "string"
+                    },
+                    "title": {
+                      "type": "string"
+                    },
+                    "url": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "corpus_id",
+                    "title",
+                    "url",
+                    "relevance"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "key_findings": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_gaps": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "summary": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "summary",
+              "key_findings",
+              "open_gaps",
+              "citations"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/literature_review.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "literature_review": {
+            "$ref": "#/$defs/literature_review"
+          }
+        },
+        "required": [
+          "literature_review",
+          "artifacts"
+        ],
+        "title": "literature_review",
+        "type": "object"
+      }
+    },
+    "novelty_assessment": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_evaluations": [
+          "theory_evaluation"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_evaluation": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "overall_support": {
+                "enum": [
+                  "supports",
+                  "mixed",
+                  "contradicts",
+                  "inconclusive"
+                ]
+              },
+              "overall_support_raw": {
+                "type": "string"
+              },
+              "statement_evaluations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "explanation": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "statement_index": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "statement_index",
+                    "novelty",
+                    "explanation"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "theory_id",
+              "novelty",
+              "overall_support",
+              "explanation",
+              "statement_evaluations"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/novelty_assessment.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_evaluations": {
+            "items": {
+              "$ref": "#/$defs/theory_evaluation"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theory_evaluations",
+          "artifacts"
+        ],
+        "title": "novelty_assessment",
+        "type": "object"
+      }
+    },
+    "provenance_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data",
+        "source_access": [
+          "source_access"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "source_access": {
+            "additionalProperties": true,
+            "properties": {
+              "data_availability": {
+                "type": "string"
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "identifier": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "data_availability",
+              "repository",
+              "identifier"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          },
+          "source_access": {
+            "items": {
+              "$ref": "#/$defs/source_access"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "source_access",
+          "artifacts"
+        ],
+        "title": "provenance_extraction",
+        "type": "object"
+      }
+    },
+    "provenance_search": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_sources": [
+          "data_source"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_source": {
+            "additionalProperties": true,
+            "properties": {
+              "dataset_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "dataset_id",
+              "paper_id",
+              "paper_title",
+              "paper_url"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_search.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_sources": {
+            "items": {
+              "$ref": "#/$defs/data_source"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "data_sources",
+          "artifacts"
+        ],
+        "title": "provenance_search",
+        "type": "object"
+      }
+    },
+    "provenance_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "provenance_report": "provenance_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "provenance_report": {
+            "additionalProperties": true,
+            "properties": {
+              "acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "not_acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sources": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "access_status": {
+                      "enum": [
+                        "acquired",
+                        "open_unfetched",
+                        "restricted",
+                        "not_found"
+                      ]
+                    },
+                    "dataset_id": {
+                      "type": "string"
+                    },
+                    "local_path": {
+                      "type": "string"
+                    },
+                    "paper_title": {
+                      "type": "string"
+                    },
+                    "paper_url": {
+                      "type": "string"
+                    },
+                    "repository": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "dataset_id",
+                    "paper_title",
+                    "paper_url",
+                    "repository",
+                    "access_status",
+                    "local_path"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "sources",
+              "method_note",
+              "acquired",
+              "not_acquired",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "provenance_report": {
+            "$ref": "#/$defs/provenance_report"
+          }
+        },
+        "required": [
+          "provenance_report",
+          "artifacts"
+        ],
+        "title": "provenance_synthesis",
+        "type": "object"
+      }
+    },
+    "reproduction_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "reproduction_report": "reproduction_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "reproduction_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "laws_ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "effect_size_source": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "independence_axes": {
+                      "items": {
+                        "enum": [
+                          "region",
+                          "instrument",
+                          "method",
+                          "construct",
+                          "temporal",
+                          "population"
+                        ]
+                      },
+                      "type": "array"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "testability": {
+                      "enum": [
+                        "tested",
+                        "proxy_only",
+                        "untestable"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "outcome",
+                    "testability",
+                    "effect_size_source",
+                    "effect_size_observed",
+                    "independence_axes",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_failed_or_untestable": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_held": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "method_note",
+              "laws_ledger",
+              "what_held",
+              "what_failed_or_untestable",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/reproduction_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "reproduction_report": {
+            "$ref": "#/$defs/reproduction_report"
+          }
+        },
+        "required": [
+          "reproduction_report",
+          "artifacts"
+        ],
+        "title": "reproduction_synthesis",
+        "type": "object"
+      }
+    },
+    "testability_triage": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "testability_triage": "testability_triage"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "testability_triage": {
+            "additionalProperties": true,
+            "properties": {
+              "assessments": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "available_data": {
+                      "type": "string"
+                    },
+                    "gap": {
+                      "type": "string"
+                    },
+                    "proposed_test": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "metric": {
+                          "type": "string"
+                        },
+                        "success_threshold": {
+                          "type": "string"
+                        },
+                        "test": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "test",
+                        "metric",
+                        "success_threshold"
+                      ],
+                      "type": "object"
+                    },
+                    "required_data": {
+                      "type": "string"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "testable_now",
+                    "available_data",
+                    "required_data",
+                    "proposed_test",
+                    "gap"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "testable_theory_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "assessments",
+              "testable_theory_ids"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/testability_triage.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "testability_triage": {
+            "$ref": "#/$defs/testability_triage"
+          }
+        },
+        "required": [
+          "testability_triage",
+          "artifacts"
+        ],
+        "title": "testability_triage",
+        "type": "object"
+      }
+    },
+    "theory_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theories": [
+          "theory"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory": {
+            "additionalProperties": true,
+            "properties": {
+              "components": {
+                "additionalProperties": true,
+                "properties": {
+                  "generation_objective": {
+                    "type": "string"
+                  },
+                  "new_predictions_likely": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "new_predictions_unknown": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statements": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "conflicting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "statement_name": {
+                          "type": "string"
+                        },
+                        "supporting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "theory_statement": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "statement_name",
+                        "theory_statement",
+                        "supporting_evidence",
+                        "conflicting_evidence"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "unaccounted_for": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "generation_objective",
+                  "theory_statements",
+                  "new_predictions_likely",
+                  "new_predictions_unknown",
+                  "unaccounted_for"
+                ],
+                "type": "object"
+              },
+              "description": {
+                "type": "string"
+              },
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "name": {
+                "type": "string"
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "theory_query": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "name",
+              "description",
+              "theory_query",
+              "objective",
+              "grounds_law_ids",
+              "supporting_evidence_ids",
+              "components"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theories": {
+            "items": {
+              "$ref": "#/$defs/theory"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theories",
+          "artifacts"
+        ],
+        "title": "theory_formation",
+        "type": "object"
+      }
+    },
+    "theory_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_report": "theory_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "new_predictions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "novelty_summary": {
+                "type": "string"
+              },
+              "open_threads": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "theories": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "grounds_law_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "name": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "objective": {
+                      "enum": [
+                        "accuracy_focused",
+                        "novelty_focused"
+                      ]
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "supporting_evidence_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "name",
+                    "objective",
+                    "one_line",
+                    "grounds_law_ids",
+                    "novelty",
+                    "testable_now",
+                    "supporting_evidence_ids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theories",
+              "novelty_summary",
+              "new_predictions",
+              "open_threads",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_report": {
+            "$ref": "#/$defs/theory_report"
+          }
+        },
+        "required": [
+          "theory_report",
+          "artifacts"
+        ],
+        "title": "theory_synthesis",
+        "type": "object"
+      }
+    },
+    "verification_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "verification_report": "verification_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "verification_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "novelty_by_verification": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "audit_survived": {
+                      "type": "boolean"
+                    },
+                    "claim": {
+                      "type": "string"
+                    },
+                    "data_used": {
+                      "type": "string"
+                    },
+                    "effect_size": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome",
+                    "effect_size",
+                    "data_used",
+                    "audit_survived"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_could_not_be_tested": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_was_tested": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "novelty_by_verification",
+              "what_was_tested",
+              "what_could_not_be_tested",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/verification_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "verification_report": {
+            "$ref": "#/$defs/verification_report"
+          }
+        },
+        "required": [
+          "verification_report",
+          "artifacts"
+        ],
+        "title": "verification_synthesis",
+        "type": "object"
+      }
+    }
+  }
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
new file mode 100644
index 0000000..760fbb5
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
@@ -0,0 +1,221 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_gaps_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "arose_in": {
+                "type": "string"
+              },
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity",
+              "arose_in"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "gaps",
+        "next_steps",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/gap_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_gaps_report": {
+      "$ref": "#/$defs/data_gaps_report"
+    }
+  },
+  "required": [
+    "data_gaps_report",
+    "artifacts"
+  ],
+  "title": "gap_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
new file mode 100644
index 0000000..9d18252
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
@@ -0,0 +1,167 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/holdout_replication.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "figures",
+    "artifacts"
+  ],
+  "title": "holdout_replication",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
new file mode 100644
index 0000000..e996ef7
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
@@ -0,0 +1,29 @@
+%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  literature_review["literature_review<br/>asta literature find · asta papers search"]
+  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph testing["testing (at replan)"]
+    testing__experiment_design["experiment_design<br/>asta experiment"]
+    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__adjudicate["adjudicate"]
+  end
+  class testing replan
+  hypothesis_synthesis["hypothesis_synthesis"]
+  literature_review --> hypothesis_formation
+  hypothesis_formation --> testing__experiment_design
+  literature_review --> testing__experiment_design
+  testing__experiment_design --> testing__data_acquisition
+  testing__experiment_design --> testing__analysis
+  testing__data_acquisition --> testing__analysis
+  testing__analysis --> testing__audit
+  testing__experiment_design --> testing__adjudicate
+  testing__analysis --> testing__adjudicate
+  testing__audit --> testing__adjudicate
+  hypothesis_formation --> hypothesis_synthesis
+  testing --> hypothesis_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
new file mode 100644
index 0000000..694d94f
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
@@ -0,0 +1,126 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "hypothesis": {
+      "additionalProperties": true,
+      "properties": {
+        "falsifiable_prediction": {
+          "type": "string"
+        },
+        "grounds": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "rationale": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "rationale",
+        "falsifiable_prediction",
+        "grounds"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypotheses": {
+      "items": {
+        "$ref": "#/$defs/hypothesis"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "hypotheses",
+    "artifacts"
+  ],
+  "title": "hypothesis_formation",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
new file mode 100644
index 0000000..b2fe767
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
@@ -0,0 +1,224 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "hypothesis_report": {
+      "additionalProperties": true,
+      "properties": {
+        "answer": {
+          "type": "string"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "hypothesis_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "hypothesis_id",
+              "statement",
+              "outcome",
+              "effect_size_observed",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_questions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "question": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "question",
+        "ledger",
+        "answer",
+        "open_questions",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypothesis_report": {
+      "$ref": "#/$defs/hypothesis_report"
+    }
+  },
+  "required": [
+    "hypothesis_report",
+    "artifacts"
+  ],
+  "title": "hypothesis_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
new file mode 100644
index 0000000..7b3e1fc
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
@@ -0,0 +1,139 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/law_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "law_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
new file mode 100644
index 0000000..14df7b7
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
@@ -0,0 +1,150 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "literature_review": {
+      "additionalProperties": true,
+      "properties": {
+        "citations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "corpus_id": {
+                "type": "number"
+              },
+              "id": {
+                "type": "string"
+              },
+              "relevance": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "corpus_id",
+              "title",
+              "url",
+              "relevance"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "key_findings": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_gaps": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "summary": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "summary",
+        "key_findings",
+        "open_gaps",
+        "citations"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/literature_review.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "literature_review": {
+      "$ref": "#/$defs/literature_review"
+    }
+  },
+  "required": [
+    "literature_review",
+    "artifacts"
+  ],
+  "title": "literature_review",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
new file mode 100644
index 0000000..729f9fe
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
@@ -0,0 +1,147 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_evaluation": {
+      "additionalProperties": true,
+      "properties": {
+        "explanation": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "novelty": {
+          "enum": [
+            "established",
+            "derivable",
+            "genuinely_new"
+          ]
+        },
+        "overall_support": {
+          "enum": [
+            "supports",
+            "mixed",
+            "contradicts",
+            "inconclusive"
+          ]
+        },
+        "overall_support_raw": {
+          "type": "string"
+        },
+        "statement_evaluations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "statement_index": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "statement_index",
+              "novelty",
+              "explanation"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "theory_id",
+        "novelty",
+        "overall_support",
+        "explanation",
+        "statement_evaluations"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/novelty_assessment.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_evaluations": {
+      "items": {
+        "$ref": "#/$defs/theory_evaluation"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theory_evaluations",
+    "artifacts"
+  ],
+  "title": "novelty_assessment",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
new file mode 100644
index 0000000..2bd4ea8
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
@@ -0,0 +1,163 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "source_access": {
+      "additionalProperties": true,
+      "properties": {
+        "data_availability": {
+          "type": "string"
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "identifier": {
+          "type": "string"
+        },
+        "repository": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "data_availability",
+        "repository",
+        "identifier"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    },
+    "source_access": {
+      "items": {
+        "$ref": "#/$defs/source_access"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "source_access",
+    "artifacts"
+  ],
+  "title": "provenance_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
new file mode 100644
index 0000000..8a924d9
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
@@ -0,0 +1,107 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_source": {
+      "additionalProperties": true,
+      "properties": {
+        "dataset_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "paper_title": {
+          "type": "string"
+        },
+        "paper_url": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "dataset_id",
+        "paper_id",
+        "paper_title",
+        "paper_url"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_search.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_sources": {
+      "items": {
+        "$ref": "#/$defs/data_source"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "data_sources",
+    "artifacts"
+  ],
+  "title": "provenance_search",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
new file mode 100644
index 0000000..0d43a6f
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
@@ -0,0 +1,230 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "provenance_report": {
+      "additionalProperties": true,
+      "properties": {
+        "acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "not_acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sources": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "dataset_id",
+              "paper_title",
+              "paper_url",
+              "repository",
+              "access_status",
+              "local_path"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "sources",
+        "method_note",
+        "acquired",
+        "not_acquired",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "provenance_report": {
+      "$ref": "#/$defs/provenance_report"
+    }
+  },
+  "required": [
+    "provenance_report",
+    "artifacts"
+  ],
+  "title": "provenance_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
new file mode 100644
index 0000000..4bb9e6e
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
@@ -0,0 +1,29 @@
+%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+  law_extraction["law_extraction"]
+  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+  subgraph replication["replication (at replan)"]
+    replication__experiment_design["experiment_design<br/>asta experiment"]
+    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__adjudicate["adjudicate"]
+  end
+  class replication replan
+  reproduction_synthesis["reproduction_synthesis"]
+  data_driven_discovery --> law_extraction
+  law_extraction --> evidence_gathering
+  law_extraction --> replication__experiment_design
+  evidence_gathering --> replication__experiment_design
+  replication__experiment_design --> replication__analysis
+  evidence_gathering --> replication__analysis
+  replication__analysis --> replication__audit
+  replication__experiment_design --> replication__adjudicate
+  replication__analysis --> replication__adjudicate
+  replication__audit --> replication__adjudicate
+  law_extraction --> reproduction_synthesis
+  replication --> reproduction_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
new file mode 100644
index 0000000..570e076
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
@@ -0,0 +1,253 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "reproduction_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "laws_ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "outcome",
+              "testability",
+              "effect_size_source",
+              "effect_size_observed",
+              "independence_axes",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_failed_or_untestable": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_held": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "method_note",
+        "laws_ledger",
+        "what_held",
+        "what_failed_or_untestable",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/reproduction_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "reproduction_report": {
+      "$ref": "#/$defs/reproduction_report"
+    }
+  },
+  "required": [
+    "reproduction_report",
+    "artifacts"
+  ],
+  "title": "reproduction_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
new file mode 100644
index 0000000..8968920
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "testability_triage": {
+      "additionalProperties": true,
+      "properties": {
+        "assessments": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "available_data": {
+                "type": "string"
+              },
+              "gap": {
+                "type": "string"
+              },
+              "proposed_test": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "testable_now",
+              "available_data",
+              "required_data",
+              "proposed_test",
+              "gap"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "testable_theory_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "assessments",
+        "testable_theory_ids"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/testability_triage.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "testability_triage": {
+      "$ref": "#/$defs/testability_triage"
+    }
+  },
+  "required": [
+    "testability_triage",
+    "artifacts"
+  ],
+  "title": "testability_triage",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
new file mode 100644
index 0000000..59e2d0f
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
@@ -0,0 +1,27 @@
+%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph theory_generation["theory_generation"]
+    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+  end
+  testability_triage["testability_triage"]
+  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+  theory_synthesis["theory_synthesis"]
+  ext__adjudicate(["adjudicate (external)"]):::external
+  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
+  ext__evidence_gathering(["evidence_gathering (external)"]):::external
+  ext__law_extraction(["law_extraction (external)"]):::external
+  ext__law_extraction -.-> evidence_extraction
+  ext__adjudicate -.-> evidence_extraction
+  evidence_extraction --> theory_generation__theory_formation
+  theory_generation --> testability_triage
+  ext__data_driven_discovery -.-> testability_triage
+  ext__evidence_gathering -.-> testability_triage
+  testability_triage --> novelty_assessment
+  theory_generation --> theory_synthesis
+  novelty_assessment --> theory_synthesis
+  testability_triage --> theory_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
new file mode 100644
index 0000000..7373cec
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
@@ -0,0 +1,240 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory": {
+      "additionalProperties": true,
+      "properties": {
+        "components": {
+          "additionalProperties": true,
+          "properties": {
+            "generation_objective": {
+              "type": "string"
+            },
+            "new_predictions_likely": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "new_predictions_unknown": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "theory_statements": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "statement_name": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statement": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "statement_name",
+                  "theory_statement",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            },
+            "unaccounted_for": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "text": {
+                    "type": "string"
+                  },
+                  "uuids": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "text",
+                  "uuids"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "generation_objective",
+            "theory_statements",
+            "new_predictions_likely",
+            "new_predictions_unknown",
+            "unaccounted_for"
+          ],
+          "type": "object"
+        },
+        "description": {
+          "type": "string"
+        },
+        "grounds_law_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "objective": {
+          "enum": [
+            "accuracy_focused",
+            "novelty_focused"
+          ]
+        },
+        "supporting_evidence_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "theory_query": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "name",
+        "description",
+        "theory_query",
+        "objective",
+        "grounds_law_ids",
+        "supporting_evidence_ids",
+        "components"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theories": {
+      "items": {
+        "$ref": "#/$defs/theory"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theories",
+    "artifacts"
+  ],
+  "title": "theory_formation",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
new file mode 100644
index 0000000..dd2768e
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
@@ -0,0 +1,280 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "new_predictions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "novelty_summary": {
+          "type": "string"
+        },
+        "open_threads": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "theories": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "name": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "name",
+              "objective",
+              "one_line",
+              "grounds_law_ids",
+              "novelty",
+              "testable_now",
+              "supporting_evidence_ids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theories",
+        "novelty_summary",
+        "new_predictions",
+        "open_threads",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_report": {
+      "$ref": "#/$defs/theory_report"
+    }
+  },
+  "required": [
+    "theory_report",
+    "artifacts"
+  ],
+  "title": "theory_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
new file mode 100644
index 0000000..8d1a639
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
@@ -0,0 +1,232 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "verification_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "novelty_by_verification": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "audit_survived": {
+                "type": "boolean"
+              },
+              "claim": {
+                "type": "string"
+              },
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome",
+              "effect_size",
+              "data_used",
+              "audit_survived"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_could_not_be_tested": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_was_tested": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "novelty_by_verification",
+        "what_was_tested",
+        "what_could_not_be_tested",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/verification_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "verification_report": {
+      "$ref": "#/$defs/verification_report"
+    }
+  },
+  "required": [
+    "verification_report",
+    "artifacts"
+  ],
+  "title": "verification_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta-preview/skills/research-step/assets/schemas.yaml b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
index b9643b3..b5ead12 100644
--- a/plugins/asta-preview/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta-preview/skills/research-step/assets/schemas.yaml
@@ -1,48 +1,105 @@
-version: 1
+version: 2
+
+config:
+  # Session-tunable knobs and their defaults. A mission.md may override any of
+  # them in a `## Config` section (one `key: value` line each). plan's bootstrap
+  # resolves defaults + mission overrides and pins the result on the epic root
+  # (metadata.research_step.config); execute reads the pinned values from the
+  # epic root and passes them into the chain commands. Names match the field the
+  # consuming agent actually takes.
+  n_experiments: 10                # auto-ds: experiments per discovery run; set in the run-metadata
+                                   # JSON given to `asta autodiscovery metadata` (data_driven_discovery
+                                   # fresh runs, cohort_assembly/discovery_run)
+  max_papers_to_retrieve: 30       # generate-theories find-and-extract: papers to extract from
+                                   # (provenance_extraction, evidence_extraction, hypothesis_formation)
+  max_parallel_dv_runs: 5          # cap on concurrent DataVoyager (analyze-data) submissions when a
+                                   # step fans out runs in parallel (holdout_replication, analysis
+                                   # batches); submit up to this many, then wait before submitting more
 
 enums:
-  outcome:               [held, partial, failed, n/a]
+  outcome:               [held, partial, failed, underpowered, n/a]   # the one verdict vocabulary, for laws, theories, and hypotheses
   testability:           [tested, proxy_only, untestable]
   construct_equivalence: [equivalent, proxy, mismatch]
   feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
   independence_axis:     [region, instrument, method, construct, temporal, population]
   generation_objective:  [accuracy_focused, novelty_focused]
-  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  subject_kind:          [empirical_law, theory, hypothesis]
   novelty:               [established, derivable, genuinely_new]
-  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  support_level:         [supports, mixed, contradicts, inconclusive]
   priority:              [high, medium, low]
   access_status:         [acquired, open_unfetched, restricted, not_found]
-  holdout_verdict:       [held, failed, untested]
 
 types:
 
-  artifact:
-    artifactId: string
+  # Records are immutable: a task emits a record once; later stages never re-emit
+  # it with new values. Verdicts, enrichments, and acquisition results are their
+  # own records referencing the original by id (adjudication -> subject_id,
+  # source_access/acquisition -> data_source_id).
+  #
+  # Agent outputs nest VERBATIM: when a type carries another agent's record
+  # (theory.components, experiment rows, mcts_provenance), the agent's object is
+  # stored unmodified under its key - orchestrator annotations wrap it, never
+  # reach into or rename inside it - so a real agent payload always slots in.
+  # validate-output.sh deep-validates against the compiled JSON Schemas
+  # (assets/compiled/, regenerated by scripts/compile-schemas.py at build time):
+  # top-level output keys are closed, but nested objects stay open, so extra
+  # nested fields from real payloads are always permitted. A field name ending
+  # in `?` (e.g. mcts_provenance?) is optional; unmarked fields are required.
+
+  # --- Artifacts. The `artifacts` key on every task holds A2A 1.0 Artifacts,
+  # exactly as the spec defines them: an artifact is an array of typed `parts`
+  # (wire field names, camelCase). A2A artifacts returned by chain commands are
+  # stored as received; locally produced byproducts (a rendered figure, a script,
+  # a data file) are wrapped in the same shape as file parts. Conventions on top
+  # of the spec:
+  #   - agents tag the artifact kind in metadata.type, e.g. extraction-schema |
+  #     extraction | theory | novelty | theory_store (theorizer) ·
+  #     paper-finder-search-result · widget_data_voyager (DV); local byproducts
+  #     use figure | code | data | log | experiment-design.
+  #   - local files are file parts in the *uri* form, uri = repo-root-relative
+  #     path under .asta/<agent>/<slug>/, with a mimeType (image/png,
+  #     text/x-python, text/csv, text/markdown, ...).
+  #   - never put the *bytes* form in output_json - beads caps metadata at ~64KB;
+  #     base64 payloads from agents (e.g. DV figures) are written to disk first
+  #     and referenced by uri.
+  # Byproducts always travel this channel; a thing the contract *requires*
+  # (e.g. an analysis's figures) is a typed output key.
+
+  artifact:                          # A2A 1.0 Artifact, verbatim
+    artifactId: string               # unique within the task (e.g. UUID, or <issue-id>-<n> for local byproducts)
     name: string
     description: string
-    parts: [object]
-    metadata: object
-
-  experiment:
-    experiment_id: string
-    status: string
-    hypothesis: string
+    parts: [part]
+    metadata?: object                # optional; metadata.type carries the artifact kind
+    extensions?: [string]            # optional; URIs of relevant A2A extensions
+
+  part:                              # A2A Part union, discriminated by `kind`
+    kind: string                     # text | file | data
+    metadata?: object                # optional, per part
+    # text: {kind: text, text: string}
+    # file: {kind: file, file: {uri: string, mimeType: string, name: string}}     - the only file form allowed in output_json
+    #       {kind: file, file: {bytes: base64, mimeType: string, name: string}}   - wire/disk only, never in output_json
+    # data: {kind: data, data: object} - structured payloads, stored as received
+
+  figure:                            # the report-embedding form: image is a repo-root-relative path
+    caption: string                  # (PNG/SVG), embedded via ![caption](path)
+    image: string
+
+  experiment:                        # an auto-ds experiments.json record; these four fields are the
+    experiment_id: string            # required projection - paste the full record in unchanged (extras
+    status: string                   # like experiment_plan, code, review, prior/posterior beliefs are
+    hypothesis: string               # permitted and preserved)
     analysis: string
 
-  empirical_law:
-    id: string
+  empirical_law:                     # identity of a discovered law; its verdict lives in the
+    id: string                       # adjudication that references it, never here
     statement: string
     construct: string
     source_operationalization: string
     source_node: string
-    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    effect_size_source: string       # the effect size as the source run/paper claims it
     grouping_rationale: string
-    outcome: outcome                       
-    testability: testability              
-    independence_axes: [independence_axis]
-    effect_size_source: string
-    effect_size_reproduction: string
-    replication_path: string
+    mcts_provenance?: {surprise: number, is_surprising: boolean, prior_belief: object, posterior_belief: object}   # optional; the auto-ds experiment record's search-signal fields, verbatim
 
   dataset:
     id: string
@@ -53,18 +110,25 @@ types:
     variables: [string]
     covers_laws: [string]
 
-  data_source:                       # links a run dataset to the paper and repository it came from
+  data_source:                       # the paper behind a run dataset; emitted once by provenance_search
     id: string
     dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
     paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
     paper_title: string
     paper_url: string
+
+  source_access:                     # provenance_extraction's enrichment, keyed by data_source id
+    data_source_id: string
     data_availability: string        # the paper's data-availability statement, verbatim or summarized
     repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
     identifier: string               # DOI / accession / direct URL for the data
+
+  acquisition:                       # data_acquisition's result, keyed by data_source id
+    data_source_id: string
     access_status: access_status     # acquired | open_unfetched | restricted | not_found
     local_path: string               # repo-root-relative path once acquired (else empty)
-    covers_laws: [string]
+    dataset_id: string               # the dataset registered from this source (empty if not acquired)
+    validation_note: string          # QC against the paper - n, schema/variables, units, missingness - or why not validated
 
   cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
     id: string
@@ -77,31 +141,46 @@ types:
     holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
     run_id: string                   # the stood-up auto-ds run (autodiscovery create)
 
-  reproduction_design:
-    law_id: string
+  experiment_design:                      # one test, committed before its analysis runs; used by the
+    subject_kind: subject_kind       # replication (law) and testing (hypothesis) branches
+    subject_id: string               # the law / theory / hypothesis under test
     experiment_name: string
     plain_language_description: string
-    original_operationalization: string
+    source_operationalization: string      # how the source measured it (empty for a novel hypothesis)
     independent_operationalization: string
     construct_equivalence: construct_equivalence
     feasibility: feasibility
     required_data: string
     data_gap: string
-
-  analysis:
-    final_answer: string
-    assumptions: [string]
-    figures: [{caption: string, image: string}]
+    experiment_design_query: string  # the natural-language query sent to the experiment designer (input provenance; empty when no designer ran)
+    prespecified:                    # the commitment adjudicate checks the result against
+      test: string                   # the statistical test / model
+      metric: string                 # the quantity that decides it
+      success_threshold: string      # what counts as held, incl. direction; note expected power / min detectable effect if known
+
+  analysis:                          # DataVoyager's TaskSummary, verbatim (figures are hoisted to the
+    final_answer: string             # task's `figures` output key after imageb64 -> PNG conversion)
+    assumptions: string              # a single text block, as the agent emits it
     code: string
 
   audit_report:
-    subject_id: string                     
-    analysis_id: string
-    challenges: [{concern: string, check: string, outcome: string}]
+    subject_id: string               # the law / theory / hypothesis whose analysis was audited
+    challenges: [{concern: string, check: string, outcome: string}]   # include one negative-control check (e.g. shuffled predictor)
     artifacts_found: [string]
     verdict_survives: boolean
     recommended_adjustment: string
 
+  adjudication:                      # the verdict record; references its subject, never mutates it
+    subject_kind: subject_kind
+    subject_id: string
+    outcome: outcome                 # held | partial | failed | underpowered | n/a
+    testability: testability
+    effect_size_observed: string
+    prespecified_check: string       # the observed metric vs the committed success_threshold
+    independence_axes: [independence_axis]
+    data_used: string
+    evidence: string
+
   extracted_data:
     id: string
     run_id: string
@@ -114,15 +193,29 @@ types:
         citation_title: string
         uuid: string
 
+  literature_review:                 # hypothesis_driven_research's survey output
+    summary: string
+    key_findings: [{text: string, uuids: [string]}]
+    open_gaps: [string]              # gaps that motivate hypotheses
+    citations: [{id: string, corpus_id: number, title: string, url: string, relevance: string}]   # corpus_id = canonical S2 corpusId; rows convert mechanically to PaperEntry seeds
+
+  hypothesis:                        # a slim, directly testable claim (hypothesis_driven_research)
+    id: string
+    statement: string
+    rationale: string                # why the literature implies it
+    falsifiable_prediction: string
+    grounds: [{text: string, uuids: [string]}]   # the evidence the rationale rests on
+
   theory:
     id: string
     name: string
     description: string
     theory_query: string
-    objective: generation_objective
-    grounds_law_ids: [string]
-    supporting_evidence_ids: [string]
-    components:
+    objective: generation_objective  # orchestrator annotation (the generation branch); the agent's own copy is components.generation_objective
+    grounds_law_ids: [string]        # orchestrator annotation - which laws ground this theory (no agent equivalent)
+    supporting_evidence_ids: [string]   # orchestrator annotation
+    components:                      # the theorizer's theory record, carried VERBATIM - never flatten or edit
+      generation_objective: string   # the agent's value as emitted (e.g. accuracy-focused)
       theory_statements:
         - statement_name: string
           theory_statement: string
@@ -138,28 +231,24 @@ types:
         testable_now: boolean
         available_data: string
         required_data: string
-        proposed_test: string
+        proposed_test: {test: string, metric: string, success_threshold: string}   # prespecified; the verification branch's adjudicate checks against it
         gap: string
     testable_theory_ids: [string]
 
   theory_evaluation:
     id: string
     theory_id: string
-    novelty: novelty
-    overall_support_or_contradict: string
-    overall_support_or_contradict_explanation: string
-
-  verification:
-    theory_id: string
-    prediction: string
-    verdict: verification_verdict
-    effect_size: string
-    data_used: string
-    audit_survived: boolean
-    analysis_id: string
+    novelty: novelty                 # rollup across statement_evaluations - the most novel statement wins
+    overall_support: support_level
+    overall_support_raw?: string     # the agent's untyped judgment, verbatim (optional)
+    explanation: string
+    statement_evaluations:           # the agent's real granularity - novelty is scored per statement
+      - statement_index: number
+        novelty: novelty
+        explanation: string
 
   next_run_proposal:
-    kind: next_step_kind
+    kind: string                     # any flows: or tasks: key in this file
     title: string
     tests: [string]
     data_needed: string
@@ -167,11 +256,12 @@ types:
     priority: priority
 
   # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
-  # theory_report, verification_report), one standalone data-gaps report, and a
-  # theory-led master (research_report). Each carries report_path (the .md deliverable
-  # written first), a title, a one-line headline, a typed body, and `links` back to the
-  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
-  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+  # theory_report, verification_report, hypothesis_report, discovery_report), one
+  # standalone data-gaps report, and a theory-led master (research_report). Each
+  # carries report_path (the .md deliverable written first), a title, a one-line
+  # headline, a typed body, and `links` back to the artifacts, tasks, and papers it
+  # rests on. Each sub-flow report exposes a local `gaps` list that gap_synthesis
+  # aggregates into the data_gaps_report.
 
   provenance_report:
     report_path: string
@@ -184,8 +274,10 @@ types:
         repository: string
         access_status: access_status
         local_path: string
+    method_note: string              # how sources were matched and the data merged/validated (e.g. join key, resulting n vs the run's n)
     acquired: [string]
     not_acquired: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -200,11 +292,12 @@ types:
         outcome: outcome
         testability: testability
         effect_size_source: string
-        effect_size_reproduction: string
+        effect_size_observed: string
         independence_axes: [independence_axis]
         evidence: string
     what_held: [string]
     what_failed_or_untestable: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -225,6 +318,7 @@ types:
     novelty_summary: string
     new_predictions: [string]
     open_threads: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -236,12 +330,30 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verdict: verification_verdict
+        outcome: outcome
         effect_size: string
         data_used: string
         audit_survived: boolean
     what_was_tested: string
     what_could_not_be_tested: [string]
+    figures: [figure]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  hypothesis_report:                 # synthesis output of the hypothesis_driven_research flow
+    report_path: string
+    title: string
+    headline: string
+    question: string                 # the research question from mission.md
+    ledger:
+      - hypothesis_id: string
+        statement: string
+        outcome: outcome
+        effect_size_observed: string
+        evidence: string
+    answer: string                   # what the verdicts say about the question
+    open_questions: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -256,6 +368,7 @@ types:
         severity: priority
         arose_in: string
     next_steps: [next_run_proposal]
+    figures: [figure]
     links: [{label: string, ref: string}]
 
   research_report:
@@ -267,55 +380,76 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verification: verification_verdict
+        outcome: outcome
     inference_chain: [{claim: string, chain: [string]}]
     what_was_done: [string]
     sub_reports: [{kind: string, report_path: string, one_line: string}]
     tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+    figures: [figure]                # at least the one decisive figure, embedded in the report
+    links: [{label: string, ref: string}]
 
   discovery_report:                  # synthesis output of the auto_discovery flow
     report_path: string
     title: string
     headline: string
+    run_id: string                   # the discovery run, with its cohort sizes in the report header
     laws:
       - law_id: string
         statement: string
         surprise: number             # the discovery run's surprise signal for this candidate law
-        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        outcome: outcome             # from the held-out replication (untested branches are n/a)
         deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
-        effect_size: string
+        effect_size_discovery: string   # on the discovery subset
+        effect_size_holdout: string     # on the held-out subset - the pair shows replication shrinkage
+    interpretation: string           # what the run means against the question that motivated it
     next_steps: [next_run_proposal]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
+# Tasks are pure output contracts: output maps each output_json key to its type,
+# [type] meaning a JSON array of that type. Every task also carries artifacts.
+# A task's inputs are declared per flow step (the same output shape takes
+# different inputs in different flows), under `input:` in the flows below.
+
 tasks:
-  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
-  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
-  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
-  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
-  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
-  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
-  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
-  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
-  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
-  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
-  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
-  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
-  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
-  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
-  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
-  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
-  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
-  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
-  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
-  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
-  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
-  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
-  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
-  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
-  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
-  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
-  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+  provenance_search:      {output: {data_sources: [data_source], artifacts: [artifact]}}
+  provenance_extraction:  {output: {extracted_data: extracted_data, source_access: [source_access], artifacts: [artifact]}}
+  data_acquisition:       {output: {acquisitions: [acquisition], datasets: [dataset], artifacts: [artifact]}}
+  provenance_synthesis:   {output: {provenance_report: provenance_report, artifacts: [artifact]}}
+  data_driven_discovery:  {output: {experiments: [experiment], datasets: [dataset], artifacts: [artifact]}}
+  law_extraction:         {output: {empirical_laws: [empirical_law], artifacts: [artifact]}}
+  evidence_gathering:     {output: {datasets: [dataset], artifacts: [artifact]}}
+  experiment_design:           {output: {experiment_design: experiment_design, artifacts: [artifact]}}
+  analysis:               {output: {analysis: analysis, figures: [figure], artifacts: [artifact]}}
+  audit:                  {output: {audit_report: audit_report, artifacts: [artifact]}}
+  adjudicate:             {output: {adjudication: adjudication, artifacts: [artifact]}}
+  reproduction_synthesis: {output: {reproduction_report: reproduction_report, artifacts: [artifact]}}
+  evidence_extraction:    {output: {extracted_data: extracted_data, artifacts: [artifact]}}
+  theory_formation:       {output: {theories: [theory], artifacts: [artifact]}}
+  testability_triage:     {output: {testability_triage: testability_triage, artifacts: [artifact]}}
+  novelty_assessment:     {output: {theory_evaluations: [theory_evaluation], artifacts: [artifact]}}
+  theory_synthesis:       {output: {theory_report: theory_report, artifacts: [artifact]}}
+  verification_synthesis: {output: {verification_report: verification_report, artifacts: [artifact]}}
+  gap_synthesis:          {output: {data_gaps_report: data_gaps_report, artifacts: [artifact]}}
+  final_synthesis:        {output: {research_report: research_report, artifacts: [artifact]}}
+  # hypothesis_driven_research flow
+  literature_review:      {output: {literature_review: literature_review, artifacts: [artifact]}}
+  hypothesis_formation:   {output: {hypotheses: [hypothesis], artifacts: [artifact]}}
+  hypothesis_synthesis:   {output: {hypothesis_report: hypothesis_report, artifacts: [artifact]}}
+  # auto_discovery flow (its own session in a separate workspace: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {output: {cohort: cohort, datasets: [dataset], artifacts: [artifact]}}
+  discovery_run:          {output: {experiments: [experiment], empirical_laws: [empirical_law], artifacts: [artifact]}}
+  holdout_replication:    {output: {adjudication: adjudication, figures: [figure], artifacts: [artifact]}}
+  discovery_synthesis:    {output: {discovery_report: discovery_report, artifacts: [artifact]}}
+
+# Each flow step carries: mission (what the work is), input (the upstream steps
+# in this session whose issues plan wires as the task's inputs), and chain (the
+# asta commands). A node with a chain is a step; a node with only child nodes
+# and a mission is a group; a chain item {workflow: <flow>, mission: <text>}
+# expands the named sub-flow inline. A group whose branches are created at
+# replan (one per law / theory / hypothesis, once the naming step closes)
+# declares `replan: true`.
 
 flows:
 
@@ -334,103 +468,171 @@ flows:
       chain:
         - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
     verification:
-      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.
+      replan: true
       analysis:
-        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        mission: Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [testability_triage, data_driven_discovery, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_audit:
-        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_verification:
-        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+      adjudicate:
+        mission: Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.
+        input: [testability_triage, analysis, audit]
         chain: []
     verification_synthesis:
-      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.
+      input: [verification, novelty_assessment]
       chain: []
     gap_synthesis:
-      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis]
       chain: []
     final_synthesis:
-      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis, gap_synthesis]
       chain: []
 
   data_provenance:
     mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
     provenance_search:
-      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).
+      input: []
       chain: [asta literature find, asta papers search]
     provenance_extraction:
-      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.
+      input: [provenance_search]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     data_acquisition:
-      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      mission: For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.
+      input: [provenance_search, provenance_extraction]
       chain: [asta documents, asta autodiscovery upload]
     provenance_synthesis:
-      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.
+      input: [provenance_search, provenance_extraction, data_acquisition]
       chain: []
 
   reproduction:
-    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.
     data_driven_discovery:
-      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      input: []
       chain: [asta autodiscovery run, asta autodiscovery experiments]
     law_extraction:
-      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.
+      input: [data_driven_discovery]
       chain: []
     evidence_gathering:
-      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.
+      input: [law_extraction]
       chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
     replication:
       mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
-      reproduction_design:
-        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+      replan: true
+      experiment_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.
+        input: [law_extraction, evidence_gathering]
         chain: [asta experiment]
       analysis:
-        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        mission: Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduction_audit:
-        mission: Try to refute the analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduce:
-        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+      adjudicate:
+        mission: Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.
+        input: [experiment_design, analysis, audit]
         chain: []
     reproduction_synthesis:
-      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      input: [law_extraction, replication]
       chain: []
 
   theorizer:
     mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
     evidence_extraction:
-      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      mission: Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      input: [law_extraction, adjudicate]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     theory_generation:
       mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
       theory_formation:
         mission: Form theories from the shared extraction store under this branch's objective.
+        input: [evidence_extraction]
         chain: [asta generate-theories form-theory]
     testability_triage:
-      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.
+      input: [theory_generation, data_driven_discovery, evidence_gathering]
       chain: []
     novelty_assessment:
       mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      input: [testability_triage]
       chain: [asta generate-theories evaluate-novelty]
     theory_synthesis:
       mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      input: [theory_generation, novelty_assessment, testability_triage]
+      chain: []
+
+  hypothesis_driven_research:
+    mission: Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.
+    literature_review:
+      mission: Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.
+      input: []
+      chain: [asta literature find, asta papers search]
+    hypothesis_formation:
+      mission: Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.
+      input: [literature_review]
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    testing:
+      mission: One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.
+      replan: true
+      experiment_design:
+        mission: Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.
+        input: [hypothesis_formation, literature_review]
+        chain: [asta experiment]
+      data_acquisition:
+        mission: Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.
+        input: [experiment_design]
+        chain: [asta documents, asta autodiscovery upload]
+      analysis:
+        mission: Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, data_acquisition]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      adjudicate:
+        mission: Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.
+        input: [experiment_design, analysis, audit]
+        chain: []
+    hypothesis_synthesis:
+      mission: Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.
+      input: [hypothesis_formation, testing]
       chain: []
 
   auto_discovery:
-    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
     cohort_assembly:
-      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      input: []
       chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
     discovery_run:
-      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      mission: Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      input: [cohort_assembly]
       chain: [asta autodiscovery submit, asta autodiscovery experiments]
     replication:
       mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      replan: true
       holdout_replication:
-        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [discovery_run, cohort_assembly]
         chain: [asta analyze-data submit, asta analyze-data poll]
     discovery_synthesis:
-      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      mission: Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.
+      input: [discovery_run, replication]
       chain: []
diff --git a/plugins/asta-preview/skills/research-step/scripts/close-task.sh b/plugins/asta-preview/skills/research-step/scripts/close-task.sh
index 673b23f..7535a38 100755
--- a/plugins/asta-preview/skills/research-step/scripts/close-task.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/close-task.sh
@@ -16,7 +16,8 @@ jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2;
 cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
 merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
   '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
-tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$merged" > "$tmp"
 bd update "$id" --metadata @"$tmp" >/dev/null
 
 # 2. validate structurally (reads the issue back; no style lint)
@@ -28,17 +29,25 @@ bd close "$id" >/dev/null
   || { echo "close-task: $id did not close" >&2; exit 2; }
 echo "closed $id"
 
-# 5. cascade: close each ancestor group whose direct children are all closed
+# 5. cascade: close each ancestor group whose direct children are all closed.
+# The epic root is never closed here — "root open, no open tasks" is the
+# session-complete state that epic-root.sh and the workflows rely on.
 cur_id="$id"
 while [[ "$cur_id" == *.* ]]; do
   parent="${cur_id%.*}"
-  bd show "$parent" --json >/dev/null 2>&1 || break
-  open_kids="$(bd list --json | jq --arg p "$parent" '
+  parent_json="$(bd show "$parent" --json 2>/dev/null)" || break
+  [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break
+  open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" '
     [ .[]
       | select(.id | startswith($p + "."))
       | select((.id[($p|length)+1:] | contains(".")) | not)
       | select(.status != "closed") ] | length')"
   [[ "$open_kids" -eq 0 ]] || break
-  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  if bd close "$parent" >/dev/null 2>&1; then
+    echo "closed group $parent"
+  else
+    echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2
+    break
+  fi
   cur_id="$parent"
 done
diff --git a/plugins/asta-preview/skills/research-step/scripts/create-task.sh b/plugins/asta-preview/skills/research-step/scripts/create-task.sh
index 6024cf6..1e992a9 100755
--- a/plugins/asta-preview/skills/research-step/scripts/create-task.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/create-task.sh
@@ -5,16 +5,14 @@
 # execute publishes them via close-task.sh. Prints the new issue id.
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
 parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
 
-python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
-PY
+# Validate the task_type against schemas.yaml. The helper exits 3 for an
+# unknown task_type (and prints the known ones) or 5 when the schema cannot
+# be read (e.g. PyYAML missing — run init); set -e propagates either.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 [[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
 [[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
@@ -22,6 +20,7 @@ PY
 
 if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
 meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
-  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
-tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$meta" > "$tmp"
 bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/plugins/asta-preview/skills/research-step/scripts/epic-root.sh b/plugins/asta-preview/skills/research-step/scripts/epic-root.sh
index 13a7dfd..c176ef0 100755
--- a/plugins/asta-preview/skills/research-step/scripts/epic-root.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/epic-root.sh
@@ -33,7 +33,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-ids=$(bd list --json | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
+ids=$(bd list --json --limit 0 | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
 count=$(printf '%s' "$ids" | grep -c . || true)
 
 case "$count" in
diff --git a/plugins/asta-preview/skills/research-step/scripts/next-task.sh b/plugins/asta-preview/skills/research-step/scripts/next-task.sh
new file mode 100755
index 0000000..97e3592
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/scripts/next-task.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# next-task.sh — the single definition of task ordering. Prints the open task
+# issues (status == open, metadata.research_step.task_type set), sorted
+# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical
+# sort would get this wrong past 9 siblings). Groups (no task_type) are never
+# listed; there are no dependency edges, so this order is the ordering signal.
+#
+# Used by execute (pick the next task) and update-summary (render the queue),
+# so the two never disagree about what runs next.
+#
+# Output (stdout, key: value lines):
+#   next:  <bd-id> | none
+#   queue: <space-separated bd-ids>   (omitted when empty)
+# Exit: 0 (even when next: none) · 3 bd/jq missing
+set -euo pipefail
+
+command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; }
+command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; }
+
+ids="$(bd list --json --limit 0 | jq -r '
+  [ .[]
+    | select(.status == "open")
+    | select(.metadata.research_step.task_type != null) ]
+  | sort_by(.id | split(".") | map(tonumber? // .))
+  | .[].id')"
+
+if [[ -z "$ids" ]]; then
+  echo "next: none"
+  exit 0
+fi
+
+echo "next: $(head -n1 <<<"$ids")"
+rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')"
+[[ -n "$rest" ]] && echo "queue: $rest" || true
diff --git a/plugins/asta-preview/skills/research-step/scripts/summary-check.sh b/plugins/asta-preview/skills/research-step/scripts/summary-check.sh
index 8d98b65..6a14470 100755
--- a/plugins/asta-preview/skills/research-step/scripts/summary-check.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/summary-check.sh
@@ -30,7 +30,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-current=$(bd list --json \
+current=$(bd list --json --limit 0 \
   | jq -r '.[] | select(.status != "closed") | .id' \
   | sort \
   | shasum -a 256 \
diff --git a/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh b/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh
new file mode 100755
index 0000000..ef1269b
--- /dev/null
+++ b/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# task-output-keys.sh <task_type> — print the space-separated output keys for a
+# task from assets/schemas.yaml. The single schema reader for scripts:
+# create-task.sh uses it to validate a task_type, validate-output.sh to get the
+# expected output_json keys.
+# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema
+#       (python3/PyYAML missing or schemas.yaml unreadable — run init)
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; }
+
+python3 - "$schemas" "$1" <<'PY'
+import sys
+
+try:
+    import yaml
+except ImportError:
+    print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+try:
+    with open(sys.argv[1]) as f:
+        d = yaml.safe_load(f)
+except Exception as e:
+    print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr)
+    sys.exit(5)
+
+tasks = d.get("tasks") or {}
+t = tasks.get(sys.argv[2])
+if t is None:
+    print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr)
+    print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr)
+    sys.exit(3)
+print(" ".join(t["output"]))
+PY
diff --git a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
index af3b8f6..69530f9 100755
--- a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh
@@ -1,12 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh <issue-id> — structural check of a task's stored output_json.
-# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
-# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
-# No style or quality linting.
-# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
+# Reads the issue from beads and deep-validates metadata.research_step.output_json
+# against the compiled JSON Schema (assets/compiled/<task_type>.schema.json,
+# regenerated from schemas.yaml by scripts/compile-schemas.py at build time):
+# top-level keys closed, declared nested fields required, extra nested fields
+# permitted (payloads nest verbatim). No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task
+#       · 4 schema violation
+#       · 5 schema unreadable (PyYAML/jsonschema missing or compiled schema
+#         absent — run the init workflow, or update the plugin)
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
 id="$1"
@@ -16,28 +20,46 @@ rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // e
 task_type="$(jq -r '.task_type // empty' <<<"$rs")"
 [[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
 
-expected="$(python3 - "$schemas" "$task_type" <<'PY'
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-t = d["tasks"].get(sys.argv[2])
-if t is None: sys.exit(3)
-print(" ".join(t["output"]))
-PY
-)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+# Exits 3 (unknown task_type) or 5 (schema unreadable) with its own message.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 got="$(jq -c '.output_json // empty' <<<"$rs")"
 [[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
 
-for k in $expected; do
-  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
-    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
-done
-while IFS= read -r k; do
-  case " $expected " in *" $k "*) ;; *)
-    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
-  esac
-done < <(jq -r 'keys[]' <<<"$got")
-jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
-  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
+schema="$here/../assets/compiled/${task_type}.schema.json"
+[[ -r "$schema" ]] || {
+  echo "validate-output: compiled schema missing for '$task_type' ($schema) — update the plugin (it is regenerated at build time)" >&2
+  exit 5
+}
+OUTPUT_JSON="$got" python3 - "$schema" "$task_type" <<'PY'
+import json
+import os
+import sys
+
+try:
+    import jsonschema
+except ImportError:
+    print("validate-output: python3 cannot import jsonschema - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+with open(sys.argv[1]) as f:
+    schema = json.load(f)
+data = json.loads(os.environ["OUTPUT_JSON"])
+
+validator = jsonschema.Draft202012Validator(schema)
+errors = sorted(validator.iter_errors(data), key=lambda e: list(map(str, e.absolute_path)))
+if errors:
+    for e in errors[:5]:
+        path = ".".join(str(p) for p in e.absolute_path)
+        where = f"output_json.{path}" if path else "output_json"
+        hint = ""
+        if e.validator == "additionalProperties" and not path:
+            hint = " - byproducts go in artifacts"
+        print(f"validate-output: {where}: {e.message}{hint}", file=sys.stderr)
+    if len(errors) > 5:
+        print(f"validate-output: ... and {len(errors) - 5} more schema violation(s)", file=sys.stderr)
+    print(f"validate-output: output_json does not satisfy the '{sys.argv[2]}' schema", file=sys.stderr)
+    sys.exit(4)
+PY
 
 echo "ok"
diff --git a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
index 250ba36..6a9bbf6 100644
--- a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
+++ b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md
@@ -25,27 +25,27 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)**: open `assets/schemas.yaml` and enumerate the keys under `flows:` — do **not** offer flows from memory; the file is the only source of the list, and each flow's purpose is in its `mission` field. A custom chain of `tasks:` entries is also an option. A session may run more than one flow. Record the chosen flow(s) in `mission.md` so `plan` can read them. Also surface the session **config knobs** (the `config:` section of `assets/schemas.yaml`, e.g. `n_experiments`, `max_papers_to_retrieve`) with their defaults; record any non-default choices in a `## Config` section of `mission.md` (one `key: value` line each) — `plan` pins the resolved config on the epic at bootstrap. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
-- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / ready counts plus the single most-relevant ready task) and ask what they want to do next.
+- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / open-task counts plus the next task from `scripts/next-task.sh`) and ask what they want to do next.
 
 ### 3. Answer questions, preferring `summary.md`
 
-`summary.md` is the synthesized view of the session — mission, scope, definitions, related work, hypotheses, results, open questions, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
+`summary.md` is the synthesized view of the session — mission, flow(s), results so far (report headlines), gaps, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
 
-**Default path: read `summary.md`.** For most questions ("what's the current scope?", "which hypotheses are open?", "what's blocking progress?", "what's the state of H2?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
+**Default path: read `summary.md`.** For most questions ("which laws held?", "what theories came out?", "what's blocking progress?", "what's next?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
 
 **Drop down to beads only when the digest doesn't have the answer.** `summary.md` summarizes; some questions need the raw outputs:
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
 | Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
-| Task tree | `bd list --json` — ids encode the parent-child outline |
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
-| Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
+| Full issue metadata (rare; usually the digest covers it) | `bd list --all --limit 0` |
+| Task tree | `bd list --json --all --limit 0` — ids encode the parent-child outline |
+| Long-form content behind a report | follow `report_path` (or any `_path` field) from the issue's `output_json` |
+| Exact verdict / effect size for a law, theory, or hypothesis | `bd show <adjudicate-id> --json` (the adjudication record; the digest reports headlines, not the numbers) |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/execute.md b/plugins/asta-preview/skills/research-step/workflows/execute.md
index a8596e2..b4ba1ef 100644
--- a/plugins/asta-preview/skills/research-step/workflows/execute.md
+++ b/plugins/asta-preview/skills/research-step/workflows/execute.md
@@ -9,23 +9,33 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
-2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
-   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
-   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
-6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
-7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+1. **Pick a task.** If a task ID was supplied, use it. Else run `scripts/next-task.sh` and take the `next:` id — it is the single definition of ordering (open issues with a `task_type`, numerically sorted by hierarchical id; `update-summary` renders the same order). `next: none` ⇒ report that and stop. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
+2. **Check readiness.** For every issue id in this task's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), verify it is `closed` with a non-null `output_json`. If any input is not ready, **stop and report it** — the graph was built out of order (a task left `in_progress`, or a replan misordering); do not improvise the missing input. This is the readiness check that dependency edges used to provide.
+3. **Claim it.** `bd update <id> --status=in_progress`.
+4. **Load the schema and config.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>.output` (a mapping of key → type; `[type]` means a JSON array of that type); find the step inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.experiment_design`) — and use its `mission`, `input`, and `chain`. Read the **session config** pinned on the epic root (`bd show <epic-id> --json | jq '.[0].metadata.research_step.config'`) and pass its values into the chain where they apply — `n_experiments` into the run-metadata JSON for `asta autodiscovery metadata`, `max_papers_to_retrieve` on `asta generate-theories find-and-extract`. Do not re-read defaults from schemas.yaml mid-session; the pin is the truth. (Sessions bootstrapped before config pinning exist: an absent pin means use the schemas.yaml defaults.)
+5. **Gather inputs.** For every issue listed in this issue's `inputs`, read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `report_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+6. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`), and nothing else. Fill every typed field the schema declares (including typed verdicts like `adjudication.outcome` or `audit_report.verdict_survives`); only values with **no typed field** (an execution id, intermediate file paths, raw tool output) go in `artifacts`. Artifact rows are **A2A 1.0 Artifacts** — `{artifactId, name, description, parts, metadata}`, where `parts` is an array of text / file / data parts (see `artifact` and `part` in the schema). Artifacts returned by chain commands are stored as received (their kind in `metadata.type`); locally produced byproducts (a figure, a script, a data file) are wrapped as file parts in the uri form — repo-root-relative path plus mimeType — never the bytes form (beads' ~64KB cap). Records are immutable — emit verdicts and enrichments as their own records referencing the original by id (`adjudication.subject_id`, `source_access.data_source_id`); never re-emit an upstream record with changed values. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key, following the **Report conventions** below (entity hyperlinks, tables, figures). This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+7. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal the keys of `tasks.<task_type>.output` — which always include `artifacts` — none null; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed (it never closes the epic root — the session-complete state is root open with no open tasks). A non-zero exit **before** the `closed <id>` line means the issue is still `in_progress` — fix and re-run. A warning **after** `closed <id>` means the task closed but a group could not be auto-closed; close that group manually. The `description` is untouched; it stays the brief one-liner set at creation.
+8. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+
+## Report conventions
+
+These apply to every `output_markdown` and to every `*_synthesis` report deliverable. Rigorous but not over the top: a report stays roughly 50–100 lines; the detail behind it lives in artifacts it links to.
+
+- **Every named entity is a hyperlink.** Papers → DOI or canonical Semantic Scholar URL; datasets and result files → relative path; runs/experiments → their artifact or metadata file; laws/theories/hypotheses → their ledger row, written with an anchor (`<a id="l1"></a>`) so other reports can deep-link (`reproduction_report.md#l1`). A named thing with no link is a defect.
+- **Tables are the spine.** Any ledger, matrix, or catalog (laws × outcomes, theories × verdicts, sources × access) is a table with one row per record, mirroring the typed rows in `output_json`.
+- **Figures carry the quantitative claims.** Embed each one (`![caption](path)`) where the claim is made and list it in the `figures` output field. Analysis-type tasks must emit at least one figure; synthesis reports embed the figures their headline rests on (effect-size comparisons, verdict panels, discovery-vs-holdout shrinkage).
+- Neutral, third-person register; numbers in the text match the tables they summarize.
 
 ## Notes on output
 
-The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`). `output_json.artifacts` holds A2A Artifacts whose file parts reference those paths by uri; heavy payloads (base64 bytes, raw agent JSON) stay on disk, never inline.
 
 Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
+- `report_path` (from every `*_synthesis` report) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `hypothesis_report.md`, `data_gaps_report.md`).
 
 If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/init.md b/plugins/asta-preview/skills/research-step/workflows/init.md
index fd11be3..408c60f 100644
--- a/plugins/asta-preview/skills/research-step/workflows/init.md
+++ b/plugins/asta-preview/skills/research-step/workflows/init.md
@@ -1,6 +1,6 @@
 # Workflow: init
 
-Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
+Bootstrap the environment for a research session: install `bd`, `jq`, PyYAML, and jsonschema, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
 After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
@@ -32,12 +32,16 @@ Server mode (`bd init --server`) is out of scope: it requires running a Dolt sql
    - If no Dolt refs exist on the remote, surface the situation to the user with three options: (a) `bd import .beads/issues.jsonl` (fast, but discards Dolt history and any state newer than the export), (b) configure a Dolt remote and `bd dolt push` from another machine that has the live DB, then retry, (c) abort.
    - Pick one path only after explicit user confirmation. Never auto-import.
 
-4. **Verify the staleness check works.**
+4. **Ensure `python3` can import `yaml` (PyYAML) and `jsonschema`.** `scripts/task-output-keys.sh` (used by `create-task.sh` and `validate-output.sh`) parses `assets/schemas.yaml` with PyYAML; `validate-output.sh` deep-validates each task's `output_json` against the compiled schemas in `assets/compiled/` with jsonschema, and hard-fails (exit 5) without it.
+   - Probe with `python3 -c 'import yaml, jsonschema'`. If it succeeds, skip.
+   - Otherwise install what's missing: `python3 -m pip install --user pyyaml jsonschema` (or the platform equivalent, e.g. `apt-get install python3-yaml python3-jsonschema`). Re-probe; if it still fails, abort and ask the user.
+
+5. **Verify the staleness check works.**
    - Run `scripts/summary-check.sh`. It hashes the sorted IDs of currently-open issues and compares against `summary.md`'s frontmatter. Backend-agnostic — beads can use whichever storage it likes.
    - Requires `jq` on PATH; if missing, install it (`brew install jq`, `apt-get install jq`, etc.) and retry.
    - At init time `summary.md` does not yet exist, so the script will print `status: missing` and exit 1 — that's fine; **update-summary** will create the file later. `status: no-tools` (exit 3) means abort and ask the user.
 
-5. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
+6. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
 
 ## Cross-machine transfer
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/plan.md b/plugins/asta-preview/skills/research-step/workflows/plan.md
index a000e2d..444ee90 100644
--- a/plugins/asta-preview/skills/research-step/workflows/plan.md
+++ b/plugins/asta-preview/skills/research-step/workflows/plan.md
@@ -23,10 +23,10 @@ The flow in `assets/schemas.yaml` is an indented outline, and the beads graph yo
 
 Reading a flow node:
 
-- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
-- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`. Its `input:` names the upstream steps in this session whose issues you wire as the task's `inputs` (the same task type takes different inputs in different flows, so inputs live on the step, not the task).
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission`, `input`, and `chain` are never nodes.
 - A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
-- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`, `testing`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
 The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
@@ -38,10 +38,10 @@ wf                      [epic]    <mission>
   wf.1.3                          evidence_gathering
   wf.1.4                [fan-out] replication            one branch per law
    wf.1.4.1             [branch]  <law>
-    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.1                    experiment_design
     wf.1.4.1.2                    analysis
-    wf.1.4.1.3                    reproduction_audit
-    wf.1.4.1.4                    reproduce
+    wf.1.4.1.3                    audit
+    wf.1.4.1.4                    adjudicate
    wf.1.4.2             [branch]  <law> …
   wf.1.5                          reproduction_synthesis
 ```
@@ -50,35 +50,37 @@ The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproductio
 
 ## Ordering and closing (no edges)
 
-- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- **Next task = the `next:` line of `scripts/next-task.sh`** (open issues with a `task_type`, **numerically** sorted by hierarchical id — `wf.1.2` before `wf.1.10`). Groups (no `task_type`) are never executed. `execute` and `update-summary` both use this script, so they never disagree about what runs next.
 - Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
-- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. It never closes the **epic root**: "root open, no open tasks" is the session-complete state. Never close groups by hand.
 
 ## Static vs data-dependent fan-outs
 
 - **Static** (`theory_generation` by objective): both branches are known up front → create them together.
-- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+- **Data-dependent** (`replication` per law, `verification` per testable theory, `testing` per hypothesis): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`, `hypothesis_formation`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
 
 ## Gates (replan)
 
-- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `experiment_design` closes (a `replication` or `testing` branch): `feasibility` of `feasible`/`proxy_only` → create the branch's remaining steps — in `testing`, also `data_acquisition` when the design names data not yet in hand — i.e. `[data_acquisition,] analysis`, `audit`, `adjudicate`; `data_unavailable`/`construct_mismatch` → create only `adjudicate` (it records `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
 - When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
+- When `hypothesis_formation` closes: create one `testing` branch per hypothesis.
 
 ## Bootstrap
 
 1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
-2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
-3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
-4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
+2. **Resolve the session config.** Start from the `config:` defaults in `assets/schemas.yaml`; apply any overrides from a `## Config` section in `mission.md` (one `key: value` line each; unknown keys are an error — surface them). The resolved map is pinned in the next step and never re-resolved mid-session.
+3. `bd create -t epic` the root from the mission, tagged with metadata `{"research_step": {"epic_root": true, "flow": "<flow>", "config": {<resolved config>}}}`. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+4. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+5. Report the epic id, the flow, the resolved config, the loop/group ids, and the frontier task ids.
 
 ## Replan
 
 When a step closes, create the next node(s) under their parent, in flow order:
 
-- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
-- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
-- Apply the **Gates** rules above.
-- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
+- Create each step with `create-task.sh`. Its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling; the step's `input:` list in `schemas.yaml` names **which** upstream steps to wire.
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the branch steps under each via `create-task.sh` — **but a gated group lays only the steps up to its gate**: under a `replication` or `testing` branch create only `experiment_design`; the Gate below creates the rest when it closes. Ungated branches (`verification`: analysis, audit, adjudicate; `theory_generation`: theory_formation) get all their steps at branch creation. Record why for any branch the data can't support, rather than skipping it.
+- Apply the **Gates** rules above — they are the only creator of post-gate steps, so nothing is double-created.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `hypothesis_synthesis`, `discovery_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape.
 
 Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
diff --git a/plugins/asta-preview/skills/research-step/workflows/update-summary.md b/plugins/asta-preview/skills/research-step/workflows/update-summary.md
index 311c81a..a96a9fa 100644
--- a/plugins/asta-preview/skills/research-step/workflows/update-summary.md
+++ b/plugins/asta-preview/skills/research-step/workflows/update-summary.md
@@ -15,12 +15,11 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** Everything comes from `bd list --json`:
-   - the full tree (issue_count, status partition);
-   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
-   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
+3. **Gather state inline.**
+   - `bd list --json --all --limit 0` for the full tree — `--all` because closed issues carry the results, `--limit 0` because bd truncates at 50 rows by default. Project to `{id, task_type: .metadata.research_step.task_type, title, status}` and partition by `.status`.
+   - `scripts/next-task.sh` for the **next task and the queue** (open task-type issues, numerically sorted by id — the same order `execute` uses). This replaces `bd ready`; there are no edges, so id order is the ordering signal.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
-5. **Overwrite `summary.md`** using this template:
+5. **Overwrite `summary.md`** using this template (sections come from the **new taxonomy** — flows, laws, theories, reports — not from any per-flow hardcoding; render what the closed tasks' `output_json` actually contains):
 
    ```markdown
    ---
@@ -28,7 +27,7 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    beads_epic: <bd-id>
    generated_at: <ISO-8601 UTC>
    issue_count: <n>
-   ready_count: <n>
+   open_task_count: <n>
    ---
 
    # <mission title>
@@ -36,36 +35,29 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Mission
    <verbatim mission.md, or one-paragraph summary if long>
 
-   ## Research Question & Scope
-   <from scope issue's output, or "pending" if not yet closed>
+   ## Flow
+   <one line per flow this session runs (from task metadata `flow`), with where it
+   stands — e.g. "reproduction — replication branches 2/5 closed, synthesis pending">
 
-   ## Operational Definitions
-   <from definitions issue's output>
+   ## Results so far
+   <one subsection per closed `*_synthesis` task: the report's `headline` plus a link
+   to its `report_path`. Before any synthesis has closed, instead give one bullet per
+   closed task: "<bd-id> [<task_type>]: <one-line outcome from output_json>" — e.g.
+   laws extracted, datasets acquired, theories formed, verdicts finalized.>
 
-   ## Related Work
-   <literature_review.output.key_findings as bullets; link to summary_path>
-
-   ## Hypotheses
-   <one subsection per hypothesis issue: "H_n: <statement>" plus current verdict from its analysis if closed>
-
-   ## Experimental Designs
-   <one subsection per experiment_design, grouped under its hypothesis>
-
-   ## Results Summary
-   <table: hypothesis | verdict | confidence | analysis-id>
-
-   ## Open Questions
-   <synthesis.output.open_questions if synthesis exists, else aggregated from in-flight notes>
+   ## Gaps
+   <the `gaps` rows from closed report outputs (item — missing_data — severity),
+   or "none recorded">
 
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
+   - Open tasks: <n> — next: <`next:` from next-task.sh>; queue: <`queue:` line>
 
    ### Next Steps
-   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
+   <the queue from next-task.sh in order, one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If there are no open task issues, write "No open tasks — flow complete.">
+   If next-task.sh prints `next: none`, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)
diff --git a/plugins/asta/skills/research-step/SKILL.md b/plugins/asta/skills/research-step/SKILL.md
index 49a7fec..e9f9a8c 100644
--- a/plugins/asta/skills/research-step/SKILL.md
+++ b/plugins/asta/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Bash(asta:*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `hypothesis_driven_research` flow (literature → falsifiable hypotheses → one prespecified test per hypothesis), the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; run it as its own session in a **separate workspace** — own `mission.md` and `.beads` — typically kicked off after a theory-generation run; a second epic root in the same workspace breaks `scripts/epic-root.sh`), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types` (immutable records — verdicts are `adjudication` records referencing their subject), the `tasks` (pure output contracts mapping each output key to its type), and the `flows` (each step carrying its `mission`, its `input` steps, and its asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -23,7 +23,7 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 | `mission.md` | Input. The research task. |
 | `.beads/` | Source of truth for state. |
 | `summary.md` | Derived view of the session, regenerated by **update-summary**. Beads is the source of truth; this file is just a digest for humans and for **brainstorm**. Frontmatter `beads_snapshot` records the state it was rendered from. |
-| `background_knowledge.txt` | Optional. Long-form context referenced from issue metadata via `summary_path`. |
+| `.asta/<agent>/<slug>/` | Heavy artifacts (raw agent JSON, datasets, reports), referenced from `output_json` by repo-root-relative `_path` fields. |
 
 ## Workflows
 
@@ -51,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off rule in `execute.md`, last step); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json b/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
new file mode 100644
index 0000000..ccfb9d1
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/adjudicate.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "artifacts"
+  ],
+  "title": "adjudicate",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json b/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
new file mode 100644
index 0000000..55e557d
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
@@ -0,0 +1,119 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "analysis": {
+      "additionalProperties": true,
+      "properties": {
+        "assumptions": {
+          "type": "string"
+        },
+        "code": {
+          "type": "string"
+        },
+        "final_answer": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "final_answer",
+        "assumptions",
+        "code"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/analysis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "analysis": {
+      "$ref": "#/$defs/analysis"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "analysis",
+    "figures",
+    "artifacts"
+  ],
+  "title": "analysis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/audit.schema.json b/plugins/asta/skills/research-step/assets/compiled/audit.schema.json
new file mode 100644
index 0000000..ca21120
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/audit.schema.json
@@ -0,0 +1,127 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "audit_report": {
+      "additionalProperties": true,
+      "properties": {
+        "artifacts_found": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "challenges": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "check": {
+                "type": "string"
+              },
+              "concern": {
+                "type": "string"
+              },
+              "outcome": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "concern",
+              "check",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "recommended_adjustment": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "verdict_survives": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "subject_id",
+        "challenges",
+        "artifacts_found",
+        "verdict_survives",
+        "recommended_adjustment"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/audit.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "audit_report": {
+      "$ref": "#/$defs/audit_report"
+    }
+  },
+  "required": [
+    "audit_report",
+    "artifacts"
+  ],
+  "title": "audit",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd b/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
new file mode 100644
index 0000000..14cd992
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
@@ -0,0 +1,18 @@
+%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
+  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
+  subgraph replication["replication (at replan)"]
+    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
+  end
+  class replication replan
+  discovery_synthesis["discovery_synthesis"]
+  cohort_assembly --> discovery_run
+  discovery_run --> replication__holdout_replication
+  cohort_assembly --> replication__holdout_replication
+  discovery_run --> discovery_synthesis
+  replication --> discovery_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json b/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
new file mode 100644
index 0000000..4866540
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
@@ -0,0 +1,206 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "cohort": {
+      "additionalProperties": true,
+      "properties": {
+        "discovery_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "exclusion_criteria": {
+          "type": "string"
+        },
+        "holdout_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "id": {
+          "type": "string"
+        },
+        "inclusion_criteria": {
+          "type": "string"
+        },
+        "research_question": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source_data_sources": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "research_question",
+        "inclusion_criteria",
+        "exclusion_criteria",
+        "sampling",
+        "source_data_sources",
+        "discovery_subset",
+        "holdout_subset",
+        "run_id"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/cohort_assembly.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "cohort": {
+      "$ref": "#/$defs/cohort"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "cohort",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "cohort_assembly",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json b/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
new file mode 100644
index 0000000..0bec23c
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
@@ -0,0 +1,161 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "acquisition": {
+      "additionalProperties": true,
+      "properties": {
+        "access_status": {
+          "enum": [
+            "acquired",
+            "open_unfetched",
+            "restricted",
+            "not_found"
+          ]
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "dataset_id": {
+          "type": "string"
+        },
+        "local_path": {
+          "type": "string"
+        },
+        "validation_note": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "access_status",
+        "local_path",
+        "dataset_id",
+        "validation_note"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_acquisition.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "acquisitions": {
+      "items": {
+        "$ref": "#/$defs/acquisition"
+      },
+      "type": "array"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "acquisitions",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_acquisition",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
new file mode 100644
index 0000000..cb56eed
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
@@ -0,0 +1,92 @@
+%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  subgraph data_provenance["data_provenance [flow: data_provenance]"]
+    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    data_provenance__provenance_synthesis["provenance_synthesis"]
+  end
+  class data_provenance embed
+  subgraph reproduction["reproduction [flow: reproduction]"]
+    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+    reproduction__law_extraction["law_extraction"]
+    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+    subgraph reproduction__replication["replication (at replan)"]
+      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
+      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__adjudicate["adjudicate"]
+    end
+    class reproduction__replication replan
+    reproduction__reproduction_synthesis["reproduction_synthesis"]
+  end
+  class reproduction embed
+  subgraph theorizer["theorizer [flow: theorizer]"]
+    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    subgraph theorizer__theory_generation["theory_generation"]
+      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+    end
+    theorizer__testability_triage["testability_triage"]
+    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+    theorizer__theory_synthesis["theory_synthesis"]
+  end
+  class theorizer embed
+  subgraph verification["verification (at replan)"]
+    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__adjudicate["adjudicate"]
+  end
+  class verification replan
+  verification_synthesis["verification_synthesis"]
+  gap_synthesis["gap_synthesis"]
+  final_synthesis["final_synthesis"]
+  data_provenance__provenance_search --> data_provenance__provenance_extraction
+  data_provenance__provenance_search --> data_provenance__data_acquisition
+  data_provenance__provenance_extraction --> data_provenance__data_acquisition
+  data_provenance__provenance_search --> data_provenance__provenance_synthesis
+  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
+  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
+  reproduction__data_driven_discovery --> reproduction__law_extraction
+  reproduction__law_extraction --> reproduction__evidence_gathering
+  reproduction__law_extraction --> reproduction__replication__experiment_design
+  reproduction__evidence_gathering --> reproduction__replication__experiment_design
+  reproduction__replication__experiment_design --> reproduction__replication__analysis
+  reproduction__evidence_gathering --> reproduction__replication__analysis
+  reproduction__replication__analysis --> reproduction__replication__audit
+  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
+  reproduction__replication__analysis --> reproduction__replication__adjudicate
+  reproduction__replication__audit --> reproduction__replication__adjudicate
+  reproduction__law_extraction --> reproduction__reproduction_synthesis
+  reproduction__replication --> reproduction__reproduction_synthesis
+  reproduction__law_extraction --> theorizer__evidence_extraction
+  reproduction__replication__adjudicate --> theorizer__evidence_extraction
+  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
+  theorizer__theory_generation --> theorizer__testability_triage
+  reproduction__data_driven_discovery --> theorizer__testability_triage
+  reproduction__evidence_gathering --> theorizer__testability_triage
+  theorizer__testability_triage --> theorizer__novelty_assessment
+  theorizer__theory_generation --> theorizer__theory_synthesis
+  theorizer__novelty_assessment --> theorizer__theory_synthesis
+  theorizer__testability_triage --> theorizer__theory_synthesis
+  theorizer__testability_triage --> verification__analysis
+  reproduction__data_driven_discovery --> verification__analysis
+  reproduction__evidence_gathering --> verification__analysis
+  verification__analysis --> verification__audit
+  theorizer__testability_triage --> verification__adjudicate
+  verification__analysis --> verification__adjudicate
+  verification__audit --> verification__adjudicate
+  verification --> verification_synthesis
+  theorizer__novelty_assessment --> verification_synthesis
+  data_provenance__provenance_synthesis --> gap_synthesis
+  reproduction__reproduction_synthesis --> gap_synthesis
+  theorizer__theory_synthesis --> gap_synthesis
+  verification_synthesis --> gap_synthesis
+  data_provenance__provenance_synthesis --> final_synthesis
+  reproduction__reproduction_synthesis --> final_synthesis
+  theorizer__theory_synthesis --> final_synthesis
+  verification_synthesis --> final_synthesis
+  gap_synthesis --> final_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
new file mode 100644
index 0000000..14f65a7
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
@@ -0,0 +1,152 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_driven_discovery.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_driven_discovery",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd b/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
new file mode 100644
index 0000000..3b46977
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
@@ -0,0 +1,16 @@
+%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+  provenance_synthesis["provenance_synthesis"]
+  provenance_search --> provenance_extraction
+  provenance_search --> data_acquisition
+  provenance_extraction --> data_acquisition
+  provenance_search --> provenance_synthesis
+  provenance_extraction --> provenance_synthesis
+  data_acquisition --> provenance_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json b/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
new file mode 100644
index 0000000..b7ac259
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
@@ -0,0 +1,170 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_run.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "discovery_run",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
new file mode 100644
index 0000000..29cb31f
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
@@ -0,0 +1,271 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "discovery_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "interpretation": {
+          "type": "string"
+        },
+        "laws": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "deciding_experiment": {
+                "type": "string"
+              },
+              "effect_size_discovery": {
+                "type": "string"
+              },
+              "effect_size_holdout": {
+                "type": "string"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "surprise": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "surprise",
+              "outcome",
+              "deciding_experiment",
+              "effect_size_discovery",
+              "effect_size_holdout"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "run_id",
+        "laws",
+        "interpretation",
+        "next_steps",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "discovery_report": {
+      "$ref": "#/$defs/discovery_report"
+    }
+  },
+  "required": [
+    "discovery_report",
+    "artifacts"
+  ],
+  "title": "discovery_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
new file mode 100644
index 0000000..7a53a5b
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
@@ -0,0 +1,132 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "artifacts"
+  ],
+  "title": "evidence_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json b/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
new file mode 100644
index 0000000..c310796
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
@@ -0,0 +1,121 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_gathering.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "datasets",
+    "artifacts"
+  ],
+  "title": "evidence_gathering",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json b/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
new file mode 100644
index 0000000..458fe42
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
@@ -0,0 +1,162 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "experiment_design": {
+      "additionalProperties": true,
+      "properties": {
+        "construct_equivalence": {
+          "enum": [
+            "equivalent",
+            "proxy",
+            "mismatch"
+          ]
+        },
+        "data_gap": {
+          "type": "string"
+        },
+        "experiment_design_query": {
+          "type": "string"
+        },
+        "experiment_name": {
+          "type": "string"
+        },
+        "feasibility": {
+          "enum": [
+            "feasible",
+            "proxy_only",
+            "data_unavailable",
+            "construct_mismatch"
+          ]
+        },
+        "independent_operationalization": {
+          "type": "string"
+        },
+        "plain_language_description": {
+          "type": "string"
+        },
+        "prespecified": {
+          "additionalProperties": true,
+          "properties": {
+            "metric": {
+              "type": "string"
+            },
+            "success_threshold": {
+              "type": "string"
+            },
+            "test": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "test",
+            "metric",
+            "success_threshold"
+          ],
+          "type": "object"
+        },
+        "required_data": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "experiment_name",
+        "plain_language_description",
+        "source_operationalization",
+        "independent_operationalization",
+        "construct_equivalence",
+        "feasibility",
+        "required_data",
+        "data_gap",
+        "experiment_design_query",
+        "prespecified"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/experiment_design.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "experiment_design": {
+      "$ref": "#/$defs/experiment_design"
+    }
+  },
+  "required": [
+    "experiment_design",
+    "artifacts"
+  ],
+  "title": "experiment_design",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
new file mode 100644
index 0000000..b00f085
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
@@ -0,0 +1,289 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "research_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "inference_chain": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "chain": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "claim": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "claim",
+              "chain"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sub_reports": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "report_path",
+              "one_line"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "tensions_and_surprises": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "evidence": {
+                "type": "string"
+              },
+              "observation": {
+                "type": "string"
+              },
+              "where": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "observation",
+              "where",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_highlights": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "claim": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_was_done": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theory_highlights",
+        "inference_chain",
+        "what_was_done",
+        "sub_reports",
+        "tensions_and_surprises",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/final_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "research_report": {
+      "$ref": "#/$defs/research_report"
+    }
+  },
+  "required": [
+    "research_report",
+    "artifacts"
+  ],
+  "title": "final_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/flows.json b/plugins/asta/skills/research-step/assets/compiled/flows.json
new file mode 100644
index 0000000..907a432
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/flows.json
@@ -0,0 +1,6657 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "flows": {
+    "auto_discovery": {
+      "edges": [
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "discovery_run"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "discovery_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "discovery_synthesis"
+        }
+      ],
+      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta documents",
+            "asta generate-theories find-and-extract",
+            "asta autodiscovery create",
+            "asta autodiscovery upload",
+            "asta autodiscovery metadata"
+          ],
+          "id": "cohort_assembly",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
+          "name": "cohort_assembly",
+          "parent": null,
+          "replan": false,
+          "task": "cohort_assembly"
+        },
+        {
+          "chain": [
+            "asta autodiscovery submit",
+            "asta autodiscovery experiments"
+          ],
+          "id": "discovery_run",
+          "inputs": [
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
+          "name": "discovery_run",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_run"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__holdout_replication",
+          "inputs": [
+            "discovery_run",
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "holdout_replication",
+          "parent": "replication",
+          "replan": false,
+          "task": "holdout_replication"
+        },
+        {
+          "chain": [],
+          "id": "discovery_synthesis",
+          "inputs": [
+            "discovery_run",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
+          "name": "discovery_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_synthesis"
+        }
+      ]
+    },
+    "data_and_literature_grounded_theory_generation": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_provenance__data_acquisition",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "reproduction__law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "reproduction__replication__audit",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "reproduction__replication",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "adjudicate",
+          "source": "reproduction__replication__adjudicate",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "theorizer__evidence_extraction",
+          "target": "theorizer__theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__audit"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "verification__audit",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "verification",
+          "source": "verification",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "gap_synthesis",
+          "source": "gap_synthesis",
+          "target": "final_synthesis"
+        }
+      ],
+      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
+      "nodes": [
+        {
+          "id": "data_provenance",
+          "kind": "embed",
+          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
+          "name": "data_provenance",
+          "parent": null,
+          "replan": false,
+          "workflow": "data_provenance"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "data_provenance__provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "data_provenance__provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_provenance__data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "data_provenance__provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_synthesis"
+        },
+        {
+          "id": "reproduction",
+          "kind": "embed",
+          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
+          "name": "reproduction",
+          "parent": null,
+          "replan": false,
+          "workflow": "reproduction"
+        },
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "reproduction__data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "reproduction__evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "reproduction__replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": "reproduction",
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "reproduction__replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "reproduction_synthesis"
+        },
+        {
+          "id": "theorizer",
+          "kind": "embed",
+          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
+          "name": "theorizer",
+          "parent": null,
+          "replan": false,
+          "workflow": "theorizer"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "theorizer__evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theorizer__theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": "theorizer",
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theorizer__theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theorizer__theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "theorizer__novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "verification",
+          "kind": "group",
+          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
+          "name": "verification",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__analysis",
+          "inputs": [
+            "testability_triage",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "verification",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "verification",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "verification__adjudicate",
+          "inputs": [
+            "testability_triage",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
+          "name": "adjudicate",
+          "parent": "verification",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "verification_synthesis",
+          "inputs": [
+            "verification",
+            "novelty_assessment"
+          ],
+          "kind": "step",
+          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
+          "name": "verification_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "verification_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "gap_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
+          "name": "gap_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "gap_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "final_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis",
+            "gap_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
+          "name": "final_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "final_synthesis"
+        }
+      ]
+    },
+    "data_provenance": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_acquisition",
+          "target": "provenance_synthesis"
+        }
+      ],
+      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": null,
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_synthesis"
+        }
+      ]
+    },
+    "hypothesis_driven_research": {
+      "edges": [
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "hypothesis_formation"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "testing__data_acquisition",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "testing__audit",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "hypothesis_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testing",
+          "source": "testing",
+          "target": "hypothesis_synthesis"
+        }
+      ],
+      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "literature_review",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
+          "name": "literature_review",
+          "parent": null,
+          "replan": false,
+          "task": "literature_review"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "hypothesis_formation",
+          "inputs": [
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
+          "name": "hypothesis_formation",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_formation"
+        },
+        {
+          "id": "testing",
+          "kind": "group",
+          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
+          "name": "testing",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "testing__experiment_design",
+          "inputs": [
+            "hypothesis_formation",
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "testing",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "testing__data_acquisition",
+          "inputs": [
+            "experiment_design"
+          ],
+          "kind": "step",
+          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
+          "name": "data_acquisition",
+          "parent": "testing",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__analysis",
+          "inputs": [
+            "experiment_design",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "testing",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "testing",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "testing__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
+          "name": "adjudicate",
+          "parent": "testing",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "hypothesis_synthesis",
+          "inputs": [
+            "hypothesis_formation",
+            "testing"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
+          "name": "hypothesis_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_synthesis"
+        }
+      ]
+    },
+    "reproduction": {
+      "edges": [
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "data_driven_discovery",
+          "target": "law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "replication__audit",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "reproduction_synthesis"
+        }
+      ],
+      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
+      "nodes": [
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "reproduction_synthesis"
+        }
+      ]
+    },
+    "theorizer": {
+      "edges": [
+        {
+          "external": true,
+          "input": "law_extraction",
+          "source": "ext__law_extraction",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": true,
+          "input": "adjudicate",
+          "source": "ext__adjudicate",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "evidence_extraction",
+          "target": "theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "data_driven_discovery",
+          "source": "ext__data_driven_discovery",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "evidence_gathering",
+          "source": "ext__evidence_gathering",
+          "target": "testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "novelty_assessment",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "theory_synthesis"
+        }
+      ],
+      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
+      "nodes": [
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": null,
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": null,
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": null,
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "ext__adjudicate",
+          "kind": "external",
+          "mission": "",
+          "name": "adjudicate",
+          "parent": null,
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "id": "ext__data_driven_discovery",
+          "kind": "external",
+          "mission": "",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "id": "ext__evidence_gathering",
+          "kind": "external",
+          "mission": "",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "ext__law_extraction",
+          "kind": "external",
+          "mission": "",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        }
+      ]
+    }
+  },
+  "format_version": 1,
+  "schema_version": 2,
+  "tasks": {
+    "adjudicate": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/adjudicate.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "artifacts"
+        ],
+        "title": "adjudicate",
+        "type": "object"
+      }
+    },
+    "analysis": {
+      "output": {
+        "analysis": "analysis",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "analysis": {
+            "additionalProperties": true,
+            "properties": {
+              "assumptions": {
+                "type": "string"
+              },
+              "code": {
+                "type": "string"
+              },
+              "final_answer": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "final_answer",
+              "assumptions",
+              "code"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/analysis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "analysis": {
+            "$ref": "#/$defs/analysis"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "analysis",
+          "figures",
+          "artifacts"
+        ],
+        "title": "analysis",
+        "type": "object"
+      }
+    },
+    "audit": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "audit_report": "audit_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "audit_report": {
+            "additionalProperties": true,
+            "properties": {
+              "artifacts_found": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "challenges": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "check": {
+                      "type": "string"
+                    },
+                    "concern": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "concern",
+                    "check",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "recommended_adjustment": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "verdict_survives": {
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "subject_id",
+              "challenges",
+              "artifacts_found",
+              "verdict_survives",
+              "recommended_adjustment"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/audit.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "audit_report": {
+            "$ref": "#/$defs/audit_report"
+          }
+        },
+        "required": [
+          "audit_report",
+          "artifacts"
+        ],
+        "title": "audit",
+        "type": "object"
+      }
+    },
+    "cohort_assembly": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "cohort": "cohort",
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "cohort": {
+            "additionalProperties": true,
+            "properties": {
+              "discovery_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "exclusion_criteria": {
+                "type": "string"
+              },
+              "holdout_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "id": {
+                "type": "string"
+              },
+              "inclusion_criteria": {
+                "type": "string"
+              },
+              "research_question": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source_data_sources": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "research_question",
+              "inclusion_criteria",
+              "exclusion_criteria",
+              "sampling",
+              "source_data_sources",
+              "discovery_subset",
+              "holdout_subset",
+              "run_id"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/cohort_assembly.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "cohort": {
+            "$ref": "#/$defs/cohort"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "cohort",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "cohort_assembly",
+        "type": "object"
+      }
+    },
+    "data_acquisition": {
+      "output": {
+        "acquisitions": [
+          "acquisition"
+        ],
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "acquisition": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "validation_note": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "access_status",
+              "local_path",
+              "dataset_id",
+              "validation_note"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_acquisition.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "acquisitions": {
+            "items": {
+              "$ref": "#/$defs/acquisition"
+            },
+            "type": "array"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "acquisitions",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_acquisition",
+        "type": "object"
+      }
+    },
+    "data_driven_discovery": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_driven_discovery.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_driven_discovery",
+        "type": "object"
+      }
+    },
+    "discovery_run": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_run.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "discovery_run",
+        "type": "object"
+      }
+    },
+    "discovery_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "discovery_report": "discovery_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "discovery_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "interpretation": {
+                "type": "string"
+              },
+              "laws": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "deciding_experiment": {
+                      "type": "string"
+                    },
+                    "effect_size_discovery": {
+                      "type": "string"
+                    },
+                    "effect_size_holdout": {
+                      "type": "string"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "surprise": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "surprise",
+                    "outcome",
+                    "deciding_experiment",
+                    "effect_size_discovery",
+                    "effect_size_holdout"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "run_id",
+              "laws",
+              "interpretation",
+              "next_steps",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "discovery_report": {
+            "$ref": "#/$defs/discovery_report"
+          }
+        },
+        "required": [
+          "discovery_report",
+          "artifacts"
+        ],
+        "title": "discovery_synthesis",
+        "type": "object"
+      }
+    },
+    "evidence_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "artifacts"
+        ],
+        "title": "evidence_extraction",
+        "type": "object"
+      }
+    },
+    "evidence_gathering": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_gathering.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "datasets",
+          "artifacts"
+        ],
+        "title": "evidence_gathering",
+        "type": "object"
+      }
+    },
+    "experiment_design": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "experiment_design": "experiment_design"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "experiment_design": {
+            "additionalProperties": true,
+            "properties": {
+              "construct_equivalence": {
+                "enum": [
+                  "equivalent",
+                  "proxy",
+                  "mismatch"
+                ]
+              },
+              "data_gap": {
+                "type": "string"
+              },
+              "experiment_design_query": {
+                "type": "string"
+              },
+              "experiment_name": {
+                "type": "string"
+              },
+              "feasibility": {
+                "enum": [
+                  "feasible",
+                  "proxy_only",
+                  "data_unavailable",
+                  "construct_mismatch"
+                ]
+              },
+              "independent_operationalization": {
+                "type": "string"
+              },
+              "plain_language_description": {
+                "type": "string"
+              },
+              "prespecified": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "experiment_name",
+              "plain_language_description",
+              "source_operationalization",
+              "independent_operationalization",
+              "construct_equivalence",
+              "feasibility",
+              "required_data",
+              "data_gap",
+              "experiment_design_query",
+              "prespecified"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/experiment_design.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "experiment_design": {
+            "$ref": "#/$defs/experiment_design"
+          }
+        },
+        "required": [
+          "experiment_design",
+          "artifacts"
+        ],
+        "title": "experiment_design",
+        "type": "object"
+      }
+    },
+    "final_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "research_report": "research_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "research_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "inference_chain": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "chain": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "claim": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "claim",
+                    "chain"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sub_reports": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "kind": {
+                      "type": "string"
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "report_path": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "kind",
+                    "report_path",
+                    "one_line"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "tensions_and_surprises": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "observation": {
+                      "type": "string"
+                    },
+                    "where": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "observation",
+                    "where",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_highlights": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "claim": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_was_done": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theory_highlights",
+              "inference_chain",
+              "what_was_done",
+              "sub_reports",
+              "tensions_and_surprises",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/final_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "research_report": {
+            "$ref": "#/$defs/research_report"
+          }
+        },
+        "required": [
+          "research_report",
+          "artifacts"
+        ],
+        "title": "final_synthesis",
+        "type": "object"
+      }
+    },
+    "gap_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_gaps_report": "data_gaps_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_gaps_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "arose_in": {
+                      "type": "string"
+                    },
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity",
+                    "arose_in"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "gaps",
+              "next_steps",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/gap_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_gaps_report": {
+            "$ref": "#/$defs/data_gaps_report"
+          }
+        },
+        "required": [
+          "data_gaps_report",
+          "artifacts"
+        ],
+        "title": "gap_synthesis",
+        "type": "object"
+      }
+    },
+    "holdout_replication": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/holdout_replication.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "figures",
+          "artifacts"
+        ],
+        "title": "holdout_replication",
+        "type": "object"
+      }
+    },
+    "hypothesis_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypotheses": [
+          "hypothesis"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "hypothesis": {
+            "additionalProperties": true,
+            "properties": {
+              "falsifiable_prediction": {
+                "type": "string"
+              },
+              "grounds": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "rationale": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "rationale",
+              "falsifiable_prediction",
+              "grounds"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypotheses": {
+            "items": {
+              "$ref": "#/$defs/hypothesis"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "hypotheses",
+          "artifacts"
+        ],
+        "title": "hypothesis_formation",
+        "type": "object"
+      }
+    },
+    "hypothesis_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypothesis_report": "hypothesis_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "hypothesis_report": {
+            "additionalProperties": true,
+            "properties": {
+              "answer": {
+                "type": "string"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "hypothesis_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "hypothesis_id",
+                    "statement",
+                    "outcome",
+                    "effect_size_observed",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_questions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "question": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "question",
+              "ledger",
+              "answer",
+              "open_questions",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypothesis_report": {
+            "$ref": "#/$defs/hypothesis_report"
+          }
+        },
+        "required": [
+          "hypothesis_report",
+          "artifacts"
+        ],
+        "title": "hypothesis_synthesis",
+        "type": "object"
+      }
+    },
+    "law_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/law_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "law_extraction",
+        "type": "object"
+      }
+    },
+    "literature_review": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "literature_review": "literature_review"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "literature_review": {
+            "additionalProperties": true,
+            "properties": {
+              "citations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "corpus_id": {
+                      "type": "number"
+                    },
+                    "id": {
+                      "type": "string"
+                    },
+                    "relevance": {
+                      "type": "string"
+                    },
+                    "title": {
+                      "type": "string"
+                    },
+                    "url": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "corpus_id",
+                    "title",
+                    "url",
+                    "relevance"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "key_findings": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_gaps": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "summary": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "summary",
+              "key_findings",
+              "open_gaps",
+              "citations"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/literature_review.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "literature_review": {
+            "$ref": "#/$defs/literature_review"
+          }
+        },
+        "required": [
+          "literature_review",
+          "artifacts"
+        ],
+        "title": "literature_review",
+        "type": "object"
+      }
+    },
+    "novelty_assessment": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_evaluations": [
+          "theory_evaluation"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_evaluation": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "overall_support": {
+                "enum": [
+                  "supports",
+                  "mixed",
+                  "contradicts",
+                  "inconclusive"
+                ]
+              },
+              "overall_support_raw": {
+                "type": "string"
+              },
+              "statement_evaluations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "explanation": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "statement_index": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "statement_index",
+                    "novelty",
+                    "explanation"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "theory_id",
+              "novelty",
+              "overall_support",
+              "explanation",
+              "statement_evaluations"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/novelty_assessment.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_evaluations": {
+            "items": {
+              "$ref": "#/$defs/theory_evaluation"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theory_evaluations",
+          "artifacts"
+        ],
+        "title": "novelty_assessment",
+        "type": "object"
+      }
+    },
+    "provenance_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data",
+        "source_access": [
+          "source_access"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "source_access": {
+            "additionalProperties": true,
+            "properties": {
+              "data_availability": {
+                "type": "string"
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "identifier": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "data_availability",
+              "repository",
+              "identifier"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          },
+          "source_access": {
+            "items": {
+              "$ref": "#/$defs/source_access"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "source_access",
+          "artifacts"
+        ],
+        "title": "provenance_extraction",
+        "type": "object"
+      }
+    },
+    "provenance_search": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_sources": [
+          "data_source"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_source": {
+            "additionalProperties": true,
+            "properties": {
+              "dataset_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "dataset_id",
+              "paper_id",
+              "paper_title",
+              "paper_url"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_search.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_sources": {
+            "items": {
+              "$ref": "#/$defs/data_source"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "data_sources",
+          "artifacts"
+        ],
+        "title": "provenance_search",
+        "type": "object"
+      }
+    },
+    "provenance_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "provenance_report": "provenance_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "provenance_report": {
+            "additionalProperties": true,
+            "properties": {
+              "acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "not_acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sources": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "access_status": {
+                      "enum": [
+                        "acquired",
+                        "open_unfetched",
+                        "restricted",
+                        "not_found"
+                      ]
+                    },
+                    "dataset_id": {
+                      "type": "string"
+                    },
+                    "local_path": {
+                      "type": "string"
+                    },
+                    "paper_title": {
+                      "type": "string"
+                    },
+                    "paper_url": {
+                      "type": "string"
+                    },
+                    "repository": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "dataset_id",
+                    "paper_title",
+                    "paper_url",
+                    "repository",
+                    "access_status",
+                    "local_path"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "sources",
+              "method_note",
+              "acquired",
+              "not_acquired",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "provenance_report": {
+            "$ref": "#/$defs/provenance_report"
+          }
+        },
+        "required": [
+          "provenance_report",
+          "artifacts"
+        ],
+        "title": "provenance_synthesis",
+        "type": "object"
+      }
+    },
+    "reproduction_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "reproduction_report": "reproduction_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "reproduction_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "laws_ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "effect_size_source": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "independence_axes": {
+                      "items": {
+                        "enum": [
+                          "region",
+                          "instrument",
+                          "method",
+                          "construct",
+                          "temporal",
+                          "population"
+                        ]
+                      },
+                      "type": "array"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "testability": {
+                      "enum": [
+                        "tested",
+                        "proxy_only",
+                        "untestable"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "outcome",
+                    "testability",
+                    "effect_size_source",
+                    "effect_size_observed",
+                    "independence_axes",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_failed_or_untestable": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_held": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "method_note",
+              "laws_ledger",
+              "what_held",
+              "what_failed_or_untestable",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/reproduction_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "reproduction_report": {
+            "$ref": "#/$defs/reproduction_report"
+          }
+        },
+        "required": [
+          "reproduction_report",
+          "artifacts"
+        ],
+        "title": "reproduction_synthesis",
+        "type": "object"
+      }
+    },
+    "testability_triage": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "testability_triage": "testability_triage"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "testability_triage": {
+            "additionalProperties": true,
+            "properties": {
+              "assessments": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "available_data": {
+                      "type": "string"
+                    },
+                    "gap": {
+                      "type": "string"
+                    },
+                    "proposed_test": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "metric": {
+                          "type": "string"
+                        },
+                        "success_threshold": {
+                          "type": "string"
+                        },
+                        "test": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "test",
+                        "metric",
+                        "success_threshold"
+                      ],
+                      "type": "object"
+                    },
+                    "required_data": {
+                      "type": "string"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "testable_now",
+                    "available_data",
+                    "required_data",
+                    "proposed_test",
+                    "gap"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "testable_theory_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "assessments",
+              "testable_theory_ids"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/testability_triage.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "testability_triage": {
+            "$ref": "#/$defs/testability_triage"
+          }
+        },
+        "required": [
+          "testability_triage",
+          "artifacts"
+        ],
+        "title": "testability_triage",
+        "type": "object"
+      }
+    },
+    "theory_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theories": [
+          "theory"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory": {
+            "additionalProperties": true,
+            "properties": {
+              "components": {
+                "additionalProperties": true,
+                "properties": {
+                  "generation_objective": {
+                    "type": "string"
+                  },
+                  "new_predictions_likely": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "new_predictions_unknown": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statements": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "conflicting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "statement_name": {
+                          "type": "string"
+                        },
+                        "supporting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "theory_statement": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "statement_name",
+                        "theory_statement",
+                        "supporting_evidence",
+                        "conflicting_evidence"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "unaccounted_for": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "generation_objective",
+                  "theory_statements",
+                  "new_predictions_likely",
+                  "new_predictions_unknown",
+                  "unaccounted_for"
+                ],
+                "type": "object"
+              },
+              "description": {
+                "type": "string"
+              },
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "name": {
+                "type": "string"
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "theory_query": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "name",
+              "description",
+              "theory_query",
+              "objective",
+              "grounds_law_ids",
+              "supporting_evidence_ids",
+              "components"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theories": {
+            "items": {
+              "$ref": "#/$defs/theory"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theories",
+          "artifacts"
+        ],
+        "title": "theory_formation",
+        "type": "object"
+      }
+    },
+    "theory_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_report": "theory_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "new_predictions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "novelty_summary": {
+                "type": "string"
+              },
+              "open_threads": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "theories": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "grounds_law_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "name": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "objective": {
+                      "enum": [
+                        "accuracy_focused",
+                        "novelty_focused"
+                      ]
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "supporting_evidence_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "name",
+                    "objective",
+                    "one_line",
+                    "grounds_law_ids",
+                    "novelty",
+                    "testable_now",
+                    "supporting_evidence_ids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theories",
+              "novelty_summary",
+              "new_predictions",
+              "open_threads",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_report": {
+            "$ref": "#/$defs/theory_report"
+          }
+        },
+        "required": [
+          "theory_report",
+          "artifacts"
+        ],
+        "title": "theory_synthesis",
+        "type": "object"
+      }
+    },
+    "verification_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "verification_report": "verification_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "verification_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "novelty_by_verification": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "audit_survived": {
+                      "type": "boolean"
+                    },
+                    "claim": {
+                      "type": "string"
+                    },
+                    "data_used": {
+                      "type": "string"
+                    },
+                    "effect_size": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome",
+                    "effect_size",
+                    "data_used",
+                    "audit_survived"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_could_not_be_tested": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_was_tested": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "novelty_by_verification",
+              "what_was_tested",
+              "what_could_not_be_tested",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/verification_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "verification_report": {
+            "$ref": "#/$defs/verification_report"
+          }
+        },
+        "required": [
+          "verification_report",
+          "artifacts"
+        ],
+        "title": "verification_synthesis",
+        "type": "object"
+      }
+    }
+  }
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
new file mode 100644
index 0000000..760fbb5
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
@@ -0,0 +1,221 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_gaps_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "arose_in": {
+                "type": "string"
+              },
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity",
+              "arose_in"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "gaps",
+        "next_steps",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/gap_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_gaps_report": {
+      "$ref": "#/$defs/data_gaps_report"
+    }
+  },
+  "required": [
+    "data_gaps_report",
+    "artifacts"
+  ],
+  "title": "gap_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json b/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
new file mode 100644
index 0000000..9d18252
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
@@ -0,0 +1,167 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/holdout_replication.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "figures",
+    "artifacts"
+  ],
+  "title": "holdout_replication",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
new file mode 100644
index 0000000..e996ef7
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
@@ -0,0 +1,29 @@
+%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  literature_review["literature_review<br/>asta literature find · asta papers search"]
+  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph testing["testing (at replan)"]
+    testing__experiment_design["experiment_design<br/>asta experiment"]
+    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__adjudicate["adjudicate"]
+  end
+  class testing replan
+  hypothesis_synthesis["hypothesis_synthesis"]
+  literature_review --> hypothesis_formation
+  hypothesis_formation --> testing__experiment_design
+  literature_review --> testing__experiment_design
+  testing__experiment_design --> testing__data_acquisition
+  testing__experiment_design --> testing__analysis
+  testing__data_acquisition --> testing__analysis
+  testing__analysis --> testing__audit
+  testing__experiment_design --> testing__adjudicate
+  testing__analysis --> testing__adjudicate
+  testing__audit --> testing__adjudicate
+  hypothesis_formation --> hypothesis_synthesis
+  testing --> hypothesis_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
new file mode 100644
index 0000000..694d94f
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
@@ -0,0 +1,126 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "hypothesis": {
+      "additionalProperties": true,
+      "properties": {
+        "falsifiable_prediction": {
+          "type": "string"
+        },
+        "grounds": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "rationale": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "rationale",
+        "falsifiable_prediction",
+        "grounds"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypotheses": {
+      "items": {
+        "$ref": "#/$defs/hypothesis"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "hypotheses",
+    "artifacts"
+  ],
+  "title": "hypothesis_formation",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
new file mode 100644
index 0000000..b2fe767
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
@@ -0,0 +1,224 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "hypothesis_report": {
+      "additionalProperties": true,
+      "properties": {
+        "answer": {
+          "type": "string"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "hypothesis_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "hypothesis_id",
+              "statement",
+              "outcome",
+              "effect_size_observed",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_questions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "question": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "question",
+        "ledger",
+        "answer",
+        "open_questions",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypothesis_report": {
+      "$ref": "#/$defs/hypothesis_report"
+    }
+  },
+  "required": [
+    "hypothesis_report",
+    "artifacts"
+  ],
+  "title": "hypothesis_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
new file mode 100644
index 0000000..7b3e1fc
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
@@ -0,0 +1,139 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/law_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "law_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json b/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
new file mode 100644
index 0000000..14df7b7
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
@@ -0,0 +1,150 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "literature_review": {
+      "additionalProperties": true,
+      "properties": {
+        "citations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "corpus_id": {
+                "type": "number"
+              },
+              "id": {
+                "type": "string"
+              },
+              "relevance": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "corpus_id",
+              "title",
+              "url",
+              "relevance"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "key_findings": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_gaps": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "summary": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "summary",
+        "key_findings",
+        "open_gaps",
+        "citations"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/literature_review.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "literature_review": {
+      "$ref": "#/$defs/literature_review"
+    }
+  },
+  "required": [
+    "literature_review",
+    "artifacts"
+  ],
+  "title": "literature_review",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json b/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
new file mode 100644
index 0000000..729f9fe
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
@@ -0,0 +1,147 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_evaluation": {
+      "additionalProperties": true,
+      "properties": {
+        "explanation": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "novelty": {
+          "enum": [
+            "established",
+            "derivable",
+            "genuinely_new"
+          ]
+        },
+        "overall_support": {
+          "enum": [
+            "supports",
+            "mixed",
+            "contradicts",
+            "inconclusive"
+          ]
+        },
+        "overall_support_raw": {
+          "type": "string"
+        },
+        "statement_evaluations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "statement_index": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "statement_index",
+              "novelty",
+              "explanation"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "theory_id",
+        "novelty",
+        "overall_support",
+        "explanation",
+        "statement_evaluations"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/novelty_assessment.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_evaluations": {
+      "items": {
+        "$ref": "#/$defs/theory_evaluation"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theory_evaluations",
+    "artifacts"
+  ],
+  "title": "novelty_assessment",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
new file mode 100644
index 0000000..2bd4ea8
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
@@ -0,0 +1,163 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "source_access": {
+      "additionalProperties": true,
+      "properties": {
+        "data_availability": {
+          "type": "string"
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "identifier": {
+          "type": "string"
+        },
+        "repository": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "data_availability",
+        "repository",
+        "identifier"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    },
+    "source_access": {
+      "items": {
+        "$ref": "#/$defs/source_access"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "source_access",
+    "artifacts"
+  ],
+  "title": "provenance_extraction",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
new file mode 100644
index 0000000..8a924d9
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
@@ -0,0 +1,107 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_source": {
+      "additionalProperties": true,
+      "properties": {
+        "dataset_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "paper_title": {
+          "type": "string"
+        },
+        "paper_url": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "dataset_id",
+        "paper_id",
+        "paper_title",
+        "paper_url"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_search.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_sources": {
+      "items": {
+        "$ref": "#/$defs/data_source"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "data_sources",
+    "artifacts"
+  ],
+  "title": "provenance_search",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
new file mode 100644
index 0000000..0d43a6f
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
@@ -0,0 +1,230 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "provenance_report": {
+      "additionalProperties": true,
+      "properties": {
+        "acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "not_acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sources": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "dataset_id",
+              "paper_title",
+              "paper_url",
+              "repository",
+              "access_status",
+              "local_path"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "sources",
+        "method_note",
+        "acquired",
+        "not_acquired",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "provenance_report": {
+      "$ref": "#/$defs/provenance_report"
+    }
+  },
+  "required": [
+    "provenance_report",
+    "artifacts"
+  ],
+  "title": "provenance_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd b/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
new file mode 100644
index 0000000..4bb9e6e
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
@@ -0,0 +1,29 @@
+%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+  law_extraction["law_extraction"]
+  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+  subgraph replication["replication (at replan)"]
+    replication__experiment_design["experiment_design<br/>asta experiment"]
+    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__adjudicate["adjudicate"]
+  end
+  class replication replan
+  reproduction_synthesis["reproduction_synthesis"]
+  data_driven_discovery --> law_extraction
+  law_extraction --> evidence_gathering
+  law_extraction --> replication__experiment_design
+  evidence_gathering --> replication__experiment_design
+  replication__experiment_design --> replication__analysis
+  evidence_gathering --> replication__analysis
+  replication__analysis --> replication__audit
+  replication__experiment_design --> replication__adjudicate
+  replication__analysis --> replication__adjudicate
+  replication__audit --> replication__adjudicate
+  law_extraction --> reproduction_synthesis
+  replication --> reproduction_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
new file mode 100644
index 0000000..570e076
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
@@ -0,0 +1,253 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "reproduction_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "laws_ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "outcome",
+              "testability",
+              "effect_size_source",
+              "effect_size_observed",
+              "independence_axes",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_failed_or_untestable": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_held": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "method_note",
+        "laws_ledger",
+        "what_held",
+        "what_failed_or_untestable",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/reproduction_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "reproduction_report": {
+      "$ref": "#/$defs/reproduction_report"
+    }
+  },
+  "required": [
+    "reproduction_report",
+    "artifacts"
+  ],
+  "title": "reproduction_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json b/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
new file mode 100644
index 0000000..8968920
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "testability_triage": {
+      "additionalProperties": true,
+      "properties": {
+        "assessments": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "available_data": {
+                "type": "string"
+              },
+              "gap": {
+                "type": "string"
+              },
+              "proposed_test": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "testable_now",
+              "available_data",
+              "required_data",
+              "proposed_test",
+              "gap"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "testable_theory_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "assessments",
+        "testable_theory_ids"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/testability_triage.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "testability_triage": {
+      "$ref": "#/$defs/testability_triage"
+    }
+  },
+  "required": [
+    "testability_triage",
+    "artifacts"
+  ],
+  "title": "testability_triage",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd b/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
new file mode 100644
index 0000000..59e2d0f
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
@@ -0,0 +1,27 @@
+%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph theory_generation["theory_generation"]
+    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+  end
+  testability_triage["testability_triage"]
+  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+  theory_synthesis["theory_synthesis"]
+  ext__adjudicate(["adjudicate (external)"]):::external
+  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
+  ext__evidence_gathering(["evidence_gathering (external)"]):::external
+  ext__law_extraction(["law_extraction (external)"]):::external
+  ext__law_extraction -.-> evidence_extraction
+  ext__adjudicate -.-> evidence_extraction
+  evidence_extraction --> theory_generation__theory_formation
+  theory_generation --> testability_triage
+  ext__data_driven_discovery -.-> testability_triage
+  ext__evidence_gathering -.-> testability_triage
+  testability_triage --> novelty_assessment
+  theory_generation --> theory_synthesis
+  novelty_assessment --> theory_synthesis
+  testability_triage --> theory_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json b/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
new file mode 100644
index 0000000..7373cec
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
@@ -0,0 +1,240 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory": {
+      "additionalProperties": true,
+      "properties": {
+        "components": {
+          "additionalProperties": true,
+          "properties": {
+            "generation_objective": {
+              "type": "string"
+            },
+            "new_predictions_likely": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "new_predictions_unknown": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "theory_statements": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "statement_name": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statement": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "statement_name",
+                  "theory_statement",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            },
+            "unaccounted_for": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "text": {
+                    "type": "string"
+                  },
+                  "uuids": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "text",
+                  "uuids"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "generation_objective",
+            "theory_statements",
+            "new_predictions_likely",
+            "new_predictions_unknown",
+            "unaccounted_for"
+          ],
+          "type": "object"
+        },
+        "description": {
+          "type": "string"
+        },
+        "grounds_law_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "objective": {
+          "enum": [
+            "accuracy_focused",
+            "novelty_focused"
+          ]
+        },
+        "supporting_evidence_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "theory_query": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "name",
+        "description",
+        "theory_query",
+        "objective",
+        "grounds_law_ids",
+        "supporting_evidence_ids",
+        "components"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theories": {
+      "items": {
+        "$ref": "#/$defs/theory"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theories",
+    "artifacts"
+  ],
+  "title": "theory_formation",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
new file mode 100644
index 0000000..dd2768e
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
@@ -0,0 +1,280 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "new_predictions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "novelty_summary": {
+          "type": "string"
+        },
+        "open_threads": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "theories": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "name": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "name",
+              "objective",
+              "one_line",
+              "grounds_law_ids",
+              "novelty",
+              "testable_now",
+              "supporting_evidence_ids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theories",
+        "novelty_summary",
+        "new_predictions",
+        "open_threads",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_report": {
+      "$ref": "#/$defs/theory_report"
+    }
+  },
+  "required": [
+    "theory_report",
+    "artifacts"
+  ],
+  "title": "theory_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
new file mode 100644
index 0000000..8d1a639
--- /dev/null
+++ b/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
@@ -0,0 +1,232 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "verification_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "novelty_by_verification": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "audit_survived": {
+                "type": "boolean"
+              },
+              "claim": {
+                "type": "string"
+              },
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome",
+              "effect_size",
+              "data_used",
+              "audit_survived"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_could_not_be_tested": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_was_tested": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "novelty_by_verification",
+        "what_was_tested",
+        "what_could_not_be_tested",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/verification_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "verification_report": {
+      "$ref": "#/$defs/verification_report"
+    }
+  },
+  "required": [
+    "verification_report",
+    "artifacts"
+  ],
+  "title": "verification_synthesis",
+  "type": "object"
+}
diff --git a/plugins/asta/skills/research-step/assets/schemas.yaml b/plugins/asta/skills/research-step/assets/schemas.yaml
index b9643b3..b5ead12 100644
--- a/plugins/asta/skills/research-step/assets/schemas.yaml
+++ b/plugins/asta/skills/research-step/assets/schemas.yaml
@@ -1,48 +1,105 @@
-version: 1
+version: 2
+
+config:
+  # Session-tunable knobs and their defaults. A mission.md may override any of
+  # them in a `## Config` section (one `key: value` line each). plan's bootstrap
+  # resolves defaults + mission overrides and pins the result on the epic root
+  # (metadata.research_step.config); execute reads the pinned values from the
+  # epic root and passes them into the chain commands. Names match the field the
+  # consuming agent actually takes.
+  n_experiments: 10                # auto-ds: experiments per discovery run; set in the run-metadata
+                                   # JSON given to `asta autodiscovery metadata` (data_driven_discovery
+                                   # fresh runs, cohort_assembly/discovery_run)
+  max_papers_to_retrieve: 30       # generate-theories find-and-extract: papers to extract from
+                                   # (provenance_extraction, evidence_extraction, hypothesis_formation)
+  max_parallel_dv_runs: 5          # cap on concurrent DataVoyager (analyze-data) submissions when a
+                                   # step fans out runs in parallel (holdout_replication, analysis
+                                   # batches); submit up to this many, then wait before submitting more
 
 enums:
-  outcome:               [held, partial, failed, n/a]
+  outcome:               [held, partial, failed, underpowered, n/a]   # the one verdict vocabulary, for laws, theories, and hypotheses
   testability:           [tested, proxy_only, untestable]
   construct_equivalence: [equivalent, proxy, mismatch]
   feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
   independence_axis:     [region, instrument, method, construct, temporal, population]
   generation_objective:  [accuracy_focused, novelty_focused]
-  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  subject_kind:          [empirical_law, theory, hypothesis]
   novelty:               [established, derivable, genuinely_new]
-  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  support_level:         [supports, mixed, contradicts, inconclusive]
   priority:              [high, medium, low]
   access_status:         [acquired, open_unfetched, restricted, not_found]
-  holdout_verdict:       [held, failed, untested]
 
 types:
 
-  artifact:
-    artifactId: string
+  # Records are immutable: a task emits a record once; later stages never re-emit
+  # it with new values. Verdicts, enrichments, and acquisition results are their
+  # own records referencing the original by id (adjudication -> subject_id,
+  # source_access/acquisition -> data_source_id).
+  #
+  # Agent outputs nest VERBATIM: when a type carries another agent's record
+  # (theory.components, experiment rows, mcts_provenance), the agent's object is
+  # stored unmodified under its key - orchestrator annotations wrap it, never
+  # reach into or rename inside it - so a real agent payload always slots in.
+  # validate-output.sh deep-validates against the compiled JSON Schemas
+  # (assets/compiled/, regenerated by scripts/compile-schemas.py at build time):
+  # top-level output keys are closed, but nested objects stay open, so extra
+  # nested fields from real payloads are always permitted. A field name ending
+  # in `?` (e.g. mcts_provenance?) is optional; unmarked fields are required.
+
+  # --- Artifacts. The `artifacts` key on every task holds A2A 1.0 Artifacts,
+  # exactly as the spec defines them: an artifact is an array of typed `parts`
+  # (wire field names, camelCase). A2A artifacts returned by chain commands are
+  # stored as received; locally produced byproducts (a rendered figure, a script,
+  # a data file) are wrapped in the same shape as file parts. Conventions on top
+  # of the spec:
+  #   - agents tag the artifact kind in metadata.type, e.g. extraction-schema |
+  #     extraction | theory | novelty | theory_store (theorizer) ·
+  #     paper-finder-search-result · widget_data_voyager (DV); local byproducts
+  #     use figure | code | data | log | experiment-design.
+  #   - local files are file parts in the *uri* form, uri = repo-root-relative
+  #     path under .asta/<agent>/<slug>/, with a mimeType (image/png,
+  #     text/x-python, text/csv, text/markdown, ...).
+  #   - never put the *bytes* form in output_json - beads caps metadata at ~64KB;
+  #     base64 payloads from agents (e.g. DV figures) are written to disk first
+  #     and referenced by uri.
+  # Byproducts always travel this channel; a thing the contract *requires*
+  # (e.g. an analysis's figures) is a typed output key.
+
+  artifact:                          # A2A 1.0 Artifact, verbatim
+    artifactId: string               # unique within the task (e.g. UUID, or <issue-id>-<n> for local byproducts)
     name: string
     description: string
-    parts: [object]
-    metadata: object
-
-  experiment:
-    experiment_id: string
-    status: string
-    hypothesis: string
+    parts: [part]
+    metadata?: object                # optional; metadata.type carries the artifact kind
+    extensions?: [string]            # optional; URIs of relevant A2A extensions
+
+  part:                              # A2A Part union, discriminated by `kind`
+    kind: string                     # text | file | data
+    metadata?: object                # optional, per part
+    # text: {kind: text, text: string}
+    # file: {kind: file, file: {uri: string, mimeType: string, name: string}}     - the only file form allowed in output_json
+    #       {kind: file, file: {bytes: base64, mimeType: string, name: string}}   - wire/disk only, never in output_json
+    # data: {kind: data, data: object} - structured payloads, stored as received
+
+  figure:                            # the report-embedding form: image is a repo-root-relative path
+    caption: string                  # (PNG/SVG), embedded via ![caption](path)
+    image: string
+
+  experiment:                        # an auto-ds experiments.json record; these four fields are the
+    experiment_id: string            # required projection - paste the full record in unchanged (extras
+    status: string                   # like experiment_plan, code, review, prior/posterior beliefs are
+    hypothesis: string               # permitted and preserved)
     analysis: string
 
-  empirical_law:
-    id: string
+  empirical_law:                     # identity of a discovered law; its verdict lives in the
+    id: string                       # adjudication that references it, never here
     statement: string
     construct: string
     source_operationalization: string
     source_node: string
-    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    effect_size_source: string       # the effect size as the source run/paper claims it
     grouping_rationale: string
-    outcome: outcome                       
-    testability: testability              
-    independence_axes: [independence_axis]
-    effect_size_source: string
-    effect_size_reproduction: string
-    replication_path: string
+    mcts_provenance?: {surprise: number, is_surprising: boolean, prior_belief: object, posterior_belief: object}   # optional; the auto-ds experiment record's search-signal fields, verbatim
 
   dataset:
     id: string
@@ -53,18 +110,25 @@ types:
     variables: [string]
     covers_laws: [string]
 
-  data_source:                       # links a run dataset to the paper and repository it came from
+  data_source:                       # the paper behind a run dataset; emitted once by provenance_search
     id: string
     dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
     paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
     paper_title: string
     paper_url: string
+
+  source_access:                     # provenance_extraction's enrichment, keyed by data_source id
+    data_source_id: string
     data_availability: string        # the paper's data-availability statement, verbatim or summarized
     repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
     identifier: string               # DOI / accession / direct URL for the data
+
+  acquisition:                       # data_acquisition's result, keyed by data_source id
+    data_source_id: string
     access_status: access_status     # acquired | open_unfetched | restricted | not_found
     local_path: string               # repo-root-relative path once acquired (else empty)
-    covers_laws: [string]
+    dataset_id: string               # the dataset registered from this source (empty if not acquired)
+    validation_note: string          # QC against the paper - n, schema/variables, units, missingness - or why not validated
 
   cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
     id: string
@@ -77,31 +141,46 @@ types:
     holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
     run_id: string                   # the stood-up auto-ds run (autodiscovery create)
 
-  reproduction_design:
-    law_id: string
+  experiment_design:                      # one test, committed before its analysis runs; used by the
+    subject_kind: subject_kind       # replication (law) and testing (hypothesis) branches
+    subject_id: string               # the law / theory / hypothesis under test
     experiment_name: string
     plain_language_description: string
-    original_operationalization: string
+    source_operationalization: string      # how the source measured it (empty for a novel hypothesis)
     independent_operationalization: string
     construct_equivalence: construct_equivalence
     feasibility: feasibility
     required_data: string
     data_gap: string
-
-  analysis:
-    final_answer: string
-    assumptions: [string]
-    figures: [{caption: string, image: string}]
+    experiment_design_query: string  # the natural-language query sent to the experiment designer (input provenance; empty when no designer ran)
+    prespecified:                    # the commitment adjudicate checks the result against
+      test: string                   # the statistical test / model
+      metric: string                 # the quantity that decides it
+      success_threshold: string      # what counts as held, incl. direction; note expected power / min detectable effect if known
+
+  analysis:                          # DataVoyager's TaskSummary, verbatim (figures are hoisted to the
+    final_answer: string             # task's `figures` output key after imageb64 -> PNG conversion)
+    assumptions: string              # a single text block, as the agent emits it
     code: string
 
   audit_report:
-    subject_id: string                     
-    analysis_id: string
-    challenges: [{concern: string, check: string, outcome: string}]
+    subject_id: string               # the law / theory / hypothesis whose analysis was audited
+    challenges: [{concern: string, check: string, outcome: string}]   # include one negative-control check (e.g. shuffled predictor)
     artifacts_found: [string]
     verdict_survives: boolean
     recommended_adjustment: string
 
+  adjudication:                      # the verdict record; references its subject, never mutates it
+    subject_kind: subject_kind
+    subject_id: string
+    outcome: outcome                 # held | partial | failed | underpowered | n/a
+    testability: testability
+    effect_size_observed: string
+    prespecified_check: string       # the observed metric vs the committed success_threshold
+    independence_axes: [independence_axis]
+    data_used: string
+    evidence: string
+
   extracted_data:
     id: string
     run_id: string
@@ -114,15 +193,29 @@ types:
         citation_title: string
         uuid: string
 
+  literature_review:                 # hypothesis_driven_research's survey output
+    summary: string
+    key_findings: [{text: string, uuids: [string]}]
+    open_gaps: [string]              # gaps that motivate hypotheses
+    citations: [{id: string, corpus_id: number, title: string, url: string, relevance: string}]   # corpus_id = canonical S2 corpusId; rows convert mechanically to PaperEntry seeds
+
+  hypothesis:                        # a slim, directly testable claim (hypothesis_driven_research)
+    id: string
+    statement: string
+    rationale: string                # why the literature implies it
+    falsifiable_prediction: string
+    grounds: [{text: string, uuids: [string]}]   # the evidence the rationale rests on
+
   theory:
     id: string
     name: string
     description: string
     theory_query: string
-    objective: generation_objective
-    grounds_law_ids: [string]
-    supporting_evidence_ids: [string]
-    components:
+    objective: generation_objective  # orchestrator annotation (the generation branch); the agent's own copy is components.generation_objective
+    grounds_law_ids: [string]        # orchestrator annotation - which laws ground this theory (no agent equivalent)
+    supporting_evidence_ids: [string]   # orchestrator annotation
+    components:                      # the theorizer's theory record, carried VERBATIM - never flatten or edit
+      generation_objective: string   # the agent's value as emitted (e.g. accuracy-focused)
       theory_statements:
         - statement_name: string
           theory_statement: string
@@ -138,28 +231,24 @@ types:
         testable_now: boolean
         available_data: string
         required_data: string
-        proposed_test: string
+        proposed_test: {test: string, metric: string, success_threshold: string}   # prespecified; the verification branch's adjudicate checks against it
         gap: string
     testable_theory_ids: [string]
 
   theory_evaluation:
     id: string
     theory_id: string
-    novelty: novelty
-    overall_support_or_contradict: string
-    overall_support_or_contradict_explanation: string
-
-  verification:
-    theory_id: string
-    prediction: string
-    verdict: verification_verdict
-    effect_size: string
-    data_used: string
-    audit_survived: boolean
-    analysis_id: string
+    novelty: novelty                 # rollup across statement_evaluations - the most novel statement wins
+    overall_support: support_level
+    overall_support_raw?: string     # the agent's untyped judgment, verbatim (optional)
+    explanation: string
+    statement_evaluations:           # the agent's real granularity - novelty is scored per statement
+      - statement_index: number
+        novelty: novelty
+        explanation: string
 
   next_run_proposal:
-    kind: next_step_kind
+    kind: string                     # any flows: or tasks: key in this file
     title: string
     tests: [string]
     data_needed: string
@@ -167,11 +256,12 @@ types:
     priority: priority
 
   # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
-  # theory_report, verification_report), one standalone data-gaps report, and a
-  # theory-led master (research_report). Each carries report_path (the .md deliverable
-  # written first), a title, a one-line headline, a typed body, and `links` back to the
-  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
-  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+  # theory_report, verification_report, hypothesis_report, discovery_report), one
+  # standalone data-gaps report, and a theory-led master (research_report). Each
+  # carries report_path (the .md deliverable written first), a title, a one-line
+  # headline, a typed body, and `links` back to the artifacts, tasks, and papers it
+  # rests on. Each sub-flow report exposes a local `gaps` list that gap_synthesis
+  # aggregates into the data_gaps_report.
 
   provenance_report:
     report_path: string
@@ -184,8 +274,10 @@ types:
         repository: string
         access_status: access_status
         local_path: string
+    method_note: string              # how sources were matched and the data merged/validated (e.g. join key, resulting n vs the run's n)
     acquired: [string]
     not_acquired: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -200,11 +292,12 @@ types:
         outcome: outcome
         testability: testability
         effect_size_source: string
-        effect_size_reproduction: string
+        effect_size_observed: string
         independence_axes: [independence_axis]
         evidence: string
     what_held: [string]
     what_failed_or_untestable: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -225,6 +318,7 @@ types:
     novelty_summary: string
     new_predictions: [string]
     open_threads: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -236,12 +330,30 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verdict: verification_verdict
+        outcome: outcome
         effect_size: string
         data_used: string
         audit_survived: boolean
     what_was_tested: string
     what_could_not_be_tested: [string]
+    figures: [figure]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  hypothesis_report:                 # synthesis output of the hypothesis_driven_research flow
+    report_path: string
+    title: string
+    headline: string
+    question: string                 # the research question from mission.md
+    ledger:
+      - hypothesis_id: string
+        statement: string
+        outcome: outcome
+        effect_size_observed: string
+        evidence: string
+    answer: string                   # what the verdicts say about the question
+    open_questions: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -256,6 +368,7 @@ types:
         severity: priority
         arose_in: string
     next_steps: [next_run_proposal]
+    figures: [figure]
     links: [{label: string, ref: string}]
 
   research_report:
@@ -267,55 +380,76 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verification: verification_verdict
+        outcome: outcome
     inference_chain: [{claim: string, chain: [string]}]
     what_was_done: [string]
     sub_reports: [{kind: string, report_path: string, one_line: string}]
     tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+    figures: [figure]                # at least the one decisive figure, embedded in the report
+    links: [{label: string, ref: string}]
 
   discovery_report:                  # synthesis output of the auto_discovery flow
     report_path: string
     title: string
     headline: string
+    run_id: string                   # the discovery run, with its cohort sizes in the report header
     laws:
       - law_id: string
         statement: string
         surprise: number             # the discovery run's surprise signal for this candidate law
-        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        outcome: outcome             # from the held-out replication (untested branches are n/a)
         deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
-        effect_size: string
+        effect_size_discovery: string   # on the discovery subset
+        effect_size_holdout: string     # on the held-out subset - the pair shows replication shrinkage
+    interpretation: string           # what the run means against the question that motivated it
     next_steps: [next_run_proposal]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
+# Tasks are pure output contracts: output maps each output_json key to its type,
+# [type] meaning a JSON array of that type. Every task also carries artifacts.
+# A task's inputs are declared per flow step (the same output shape takes
+# different inputs in different flows), under `input:` in the flows below.
+
 tasks:
-  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
-  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
-  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
-  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
-  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
-  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
-  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
-  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
-  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
-  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
-  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
-  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
-  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
-  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
-  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
-  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
-  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
-  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
-  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
-  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
-  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
-  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
-  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
-  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
-  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
-  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
-  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+  provenance_search:      {output: {data_sources: [data_source], artifacts: [artifact]}}
+  provenance_extraction:  {output: {extracted_data: extracted_data, source_access: [source_access], artifacts: [artifact]}}
+  data_acquisition:       {output: {acquisitions: [acquisition], datasets: [dataset], artifacts: [artifact]}}
+  provenance_synthesis:   {output: {provenance_report: provenance_report, artifacts: [artifact]}}
+  data_driven_discovery:  {output: {experiments: [experiment], datasets: [dataset], artifacts: [artifact]}}
+  law_extraction:         {output: {empirical_laws: [empirical_law], artifacts: [artifact]}}
+  evidence_gathering:     {output: {datasets: [dataset], artifacts: [artifact]}}
+  experiment_design:           {output: {experiment_design: experiment_design, artifacts: [artifact]}}
+  analysis:               {output: {analysis: analysis, figures: [figure], artifacts: [artifact]}}
+  audit:                  {output: {audit_report: audit_report, artifacts: [artifact]}}
+  adjudicate:             {output: {adjudication: adjudication, artifacts: [artifact]}}
+  reproduction_synthesis: {output: {reproduction_report: reproduction_report, artifacts: [artifact]}}
+  evidence_extraction:    {output: {extracted_data: extracted_data, artifacts: [artifact]}}
+  theory_formation:       {output: {theories: [theory], artifacts: [artifact]}}
+  testability_triage:     {output: {testability_triage: testability_triage, artifacts: [artifact]}}
+  novelty_assessment:     {output: {theory_evaluations: [theory_evaluation], artifacts: [artifact]}}
+  theory_synthesis:       {output: {theory_report: theory_report, artifacts: [artifact]}}
+  verification_synthesis: {output: {verification_report: verification_report, artifacts: [artifact]}}
+  gap_synthesis:          {output: {data_gaps_report: data_gaps_report, artifacts: [artifact]}}
+  final_synthesis:        {output: {research_report: research_report, artifacts: [artifact]}}
+  # hypothesis_driven_research flow
+  literature_review:      {output: {literature_review: literature_review, artifacts: [artifact]}}
+  hypothesis_formation:   {output: {hypotheses: [hypothesis], artifacts: [artifact]}}
+  hypothesis_synthesis:   {output: {hypothesis_report: hypothesis_report, artifacts: [artifact]}}
+  # auto_discovery flow (its own session in a separate workspace: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {output: {cohort: cohort, datasets: [dataset], artifacts: [artifact]}}
+  discovery_run:          {output: {experiments: [experiment], empirical_laws: [empirical_law], artifacts: [artifact]}}
+  holdout_replication:    {output: {adjudication: adjudication, figures: [figure], artifacts: [artifact]}}
+  discovery_synthesis:    {output: {discovery_report: discovery_report, artifacts: [artifact]}}
+
+# Each flow step carries: mission (what the work is), input (the upstream steps
+# in this session whose issues plan wires as the task's inputs), and chain (the
+# asta commands). A node with a chain is a step; a node with only child nodes
+# and a mission is a group; a chain item {workflow: <flow>, mission: <text>}
+# expands the named sub-flow inline. A group whose branches are created at
+# replan (one per law / theory / hypothesis, once the naming step closes)
+# declares `replan: true`.
 
 flows:
 
@@ -334,103 +468,171 @@ flows:
       chain:
         - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
     verification:
-      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.
+      replan: true
       analysis:
-        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        mission: Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [testability_triage, data_driven_discovery, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_audit:
-        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_verification:
-        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+      adjudicate:
+        mission: Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.
+        input: [testability_triage, analysis, audit]
         chain: []
     verification_synthesis:
-      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.
+      input: [verification, novelty_assessment]
       chain: []
     gap_synthesis:
-      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis]
       chain: []
     final_synthesis:
-      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis, gap_synthesis]
       chain: []
 
   data_provenance:
     mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
     provenance_search:
-      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).
+      input: []
       chain: [asta literature find, asta papers search]
     provenance_extraction:
-      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.
+      input: [provenance_search]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     data_acquisition:
-      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      mission: For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.
+      input: [provenance_search, provenance_extraction]
       chain: [asta documents, asta autodiscovery upload]
     provenance_synthesis:
-      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.
+      input: [provenance_search, provenance_extraction, data_acquisition]
       chain: []
 
   reproduction:
-    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.
     data_driven_discovery:
-      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      input: []
       chain: [asta autodiscovery run, asta autodiscovery experiments]
     law_extraction:
-      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.
+      input: [data_driven_discovery]
       chain: []
     evidence_gathering:
-      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.
+      input: [law_extraction]
       chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
     replication:
       mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
-      reproduction_design:
-        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+      replan: true
+      experiment_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.
+        input: [law_extraction, evidence_gathering]
         chain: [asta experiment]
       analysis:
-        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        mission: Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduction_audit:
-        mission: Try to refute the analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduce:
-        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+      adjudicate:
+        mission: Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.
+        input: [experiment_design, analysis, audit]
         chain: []
     reproduction_synthesis:
-      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      input: [law_extraction, replication]
       chain: []
 
   theorizer:
     mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
     evidence_extraction:
-      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      mission: Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      input: [law_extraction, adjudicate]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     theory_generation:
       mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
       theory_formation:
         mission: Form theories from the shared extraction store under this branch's objective.
+        input: [evidence_extraction]
         chain: [asta generate-theories form-theory]
     testability_triage:
-      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.
+      input: [theory_generation, data_driven_discovery, evidence_gathering]
       chain: []
     novelty_assessment:
       mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      input: [testability_triage]
       chain: [asta generate-theories evaluate-novelty]
     theory_synthesis:
       mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      input: [theory_generation, novelty_assessment, testability_triage]
+      chain: []
+
+  hypothesis_driven_research:
+    mission: Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.
+    literature_review:
+      mission: Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.
+      input: []
+      chain: [asta literature find, asta papers search]
+    hypothesis_formation:
+      mission: Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.
+      input: [literature_review]
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    testing:
+      mission: One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.
+      replan: true
+      experiment_design:
+        mission: Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.
+        input: [hypothesis_formation, literature_review]
+        chain: [asta experiment]
+      data_acquisition:
+        mission: Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.
+        input: [experiment_design]
+        chain: [asta documents, asta autodiscovery upload]
+      analysis:
+        mission: Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, data_acquisition]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      adjudicate:
+        mission: Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.
+        input: [experiment_design, analysis, audit]
+        chain: []
+    hypothesis_synthesis:
+      mission: Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.
+      input: [hypothesis_formation, testing]
       chain: []
 
   auto_discovery:
-    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
     cohort_assembly:
-      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      input: []
       chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
     discovery_run:
-      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      mission: Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      input: [cohort_assembly]
       chain: [asta autodiscovery submit, asta autodiscovery experiments]
     replication:
       mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      replan: true
       holdout_replication:
-        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [discovery_run, cohort_assembly]
         chain: [asta analyze-data submit, asta analyze-data poll]
     discovery_synthesis:
-      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      mission: Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.
+      input: [discovery_run, replication]
       chain: []
diff --git a/plugins/asta/skills/research-step/scripts/close-task.sh b/plugins/asta/skills/research-step/scripts/close-task.sh
index 673b23f..7535a38 100755
--- a/plugins/asta/skills/research-step/scripts/close-task.sh
+++ b/plugins/asta/skills/research-step/scripts/close-task.sh
@@ -16,7 +16,8 @@ jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2;
 cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
 merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
   '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
-tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$merged" > "$tmp"
 bd update "$id" --metadata @"$tmp" >/dev/null
 
 # 2. validate structurally (reads the issue back; no style lint)
@@ -28,17 +29,25 @@ bd close "$id" >/dev/null
   || { echo "close-task: $id did not close" >&2; exit 2; }
 echo "closed $id"
 
-# 5. cascade: close each ancestor group whose direct children are all closed
+# 5. cascade: close each ancestor group whose direct children are all closed.
+# The epic root is never closed here — "root open, no open tasks" is the
+# session-complete state that epic-root.sh and the workflows rely on.
 cur_id="$id"
 while [[ "$cur_id" == *.* ]]; do
   parent="${cur_id%.*}"
-  bd show "$parent" --json >/dev/null 2>&1 || break
-  open_kids="$(bd list --json | jq --arg p "$parent" '
+  parent_json="$(bd show "$parent" --json 2>/dev/null)" || break
+  [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break
+  open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" '
     [ .[]
       | select(.id | startswith($p + "."))
       | select((.id[($p|length)+1:] | contains(".")) | not)
       | select(.status != "closed") ] | length')"
   [[ "$open_kids" -eq 0 ]] || break
-  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  if bd close "$parent" >/dev/null 2>&1; then
+    echo "closed group $parent"
+  else
+    echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2
+    break
+  fi
   cur_id="$parent"
 done
diff --git a/plugins/asta/skills/research-step/scripts/create-task.sh b/plugins/asta/skills/research-step/scripts/create-task.sh
index 6024cf6..1e992a9 100755
--- a/plugins/asta/skills/research-step/scripts/create-task.sh
+++ b/plugins/asta/skills/research-step/scripts/create-task.sh
@@ -5,16 +5,14 @@
 # execute publishes them via close-task.sh. Prints the new issue id.
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
 parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
 
-python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
-PY
+# Validate the task_type against schemas.yaml. The helper exits 3 for an
+# unknown task_type (and prints the known ones) or 5 when the schema cannot
+# be read (e.g. PyYAML missing — run init); set -e propagates either.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 [[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
 [[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
@@ -22,6 +20,7 @@ PY
 
 if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
 meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
-  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
-tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$meta" > "$tmp"
 bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/plugins/asta/skills/research-step/scripts/epic-root.sh b/plugins/asta/skills/research-step/scripts/epic-root.sh
index 13a7dfd..c176ef0 100755
--- a/plugins/asta/skills/research-step/scripts/epic-root.sh
+++ b/plugins/asta/skills/research-step/scripts/epic-root.sh
@@ -33,7 +33,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-ids=$(bd list --json | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
+ids=$(bd list --json --limit 0 | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
 count=$(printf '%s' "$ids" | grep -c . || true)
 
 case "$count" in
diff --git a/plugins/asta/skills/research-step/scripts/next-task.sh b/plugins/asta/skills/research-step/scripts/next-task.sh
new file mode 100755
index 0000000..97e3592
--- /dev/null
+++ b/plugins/asta/skills/research-step/scripts/next-task.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# next-task.sh — the single definition of task ordering. Prints the open task
+# issues (status == open, metadata.research_step.task_type set), sorted
+# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical
+# sort would get this wrong past 9 siblings). Groups (no task_type) are never
+# listed; there are no dependency edges, so this order is the ordering signal.
+#
+# Used by execute (pick the next task) and update-summary (render the queue),
+# so the two never disagree about what runs next.
+#
+# Output (stdout, key: value lines):
+#   next:  <bd-id> | none
+#   queue: <space-separated bd-ids>   (omitted when empty)
+# Exit: 0 (even when next: none) · 3 bd/jq missing
+set -euo pipefail
+
+command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; }
+command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; }
+
+ids="$(bd list --json --limit 0 | jq -r '
+  [ .[]
+    | select(.status == "open")
+    | select(.metadata.research_step.task_type != null) ]
+  | sort_by(.id | split(".") | map(tonumber? // .))
+  | .[].id')"
+
+if [[ -z "$ids" ]]; then
+  echo "next: none"
+  exit 0
+fi
+
+echo "next: $(head -n1 <<<"$ids")"
+rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')"
+[[ -n "$rest" ]] && echo "queue: $rest" || true
diff --git a/plugins/asta/skills/research-step/scripts/summary-check.sh b/plugins/asta/skills/research-step/scripts/summary-check.sh
index 8d98b65..6a14470 100755
--- a/plugins/asta/skills/research-step/scripts/summary-check.sh
+++ b/plugins/asta/skills/research-step/scripts/summary-check.sh
@@ -30,7 +30,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-current=$(bd list --json \
+current=$(bd list --json --limit 0 \
   | jq -r '.[] | select(.status != "closed") | .id' \
   | sort \
   | shasum -a 256 \
diff --git a/plugins/asta/skills/research-step/scripts/task-output-keys.sh b/plugins/asta/skills/research-step/scripts/task-output-keys.sh
new file mode 100755
index 0000000..ef1269b
--- /dev/null
+++ b/plugins/asta/skills/research-step/scripts/task-output-keys.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# task-output-keys.sh <task_type> — print the space-separated output keys for a
+# task from assets/schemas.yaml. The single schema reader for scripts:
+# create-task.sh uses it to validate a task_type, validate-output.sh to get the
+# expected output_json keys.
+# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema
+#       (python3/PyYAML missing or schemas.yaml unreadable — run init)
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; }
+
+python3 - "$schemas" "$1" <<'PY'
+import sys
+
+try:
+    import yaml
+except ImportError:
+    print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+try:
+    with open(sys.argv[1]) as f:
+        d = yaml.safe_load(f)
+except Exception as e:
+    print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr)
+    sys.exit(5)
+
+tasks = d.get("tasks") or {}
+t = tasks.get(sys.argv[2])
+if t is None:
+    print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr)
+    print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr)
+    sys.exit(3)
+print(" ".join(t["output"]))
+PY
diff --git a/plugins/asta/skills/research-step/scripts/validate-output.sh b/plugins/asta/skills/research-step/scripts/validate-output.sh
index af3b8f6..69530f9 100755
--- a/plugins/asta/skills/research-step/scripts/validate-output.sh
+++ b/plugins/asta/skills/research-step/scripts/validate-output.sh
@@ -1,12 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh <issue-id> — structural check of a task's stored output_json.
-# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
-# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
-# No style or quality linting.
-# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
+# Reads the issue from beads and deep-validates metadata.research_step.output_json
+# against the compiled JSON Schema (assets/compiled/<task_type>.schema.json,
+# regenerated from schemas.yaml by scripts/compile-schemas.py at build time):
+# top-level keys closed, declared nested fields required, extra nested fields
+# permitted (payloads nest verbatim). No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task
+#       · 4 schema violation
+#       · 5 schema unreadable (PyYAML/jsonschema missing or compiled schema
+#         absent — run the init workflow, or update the plugin)
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
 id="$1"
@@ -16,28 +20,46 @@ rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // e
 task_type="$(jq -r '.task_type // empty' <<<"$rs")"
 [[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
 
-expected="$(python3 - "$schemas" "$task_type" <<'PY'
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-t = d["tasks"].get(sys.argv[2])
-if t is None: sys.exit(3)
-print(" ".join(t["output"]))
-PY
-)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+# Exits 3 (unknown task_type) or 5 (schema unreadable) with its own message.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 got="$(jq -c '.output_json // empty' <<<"$rs")"
 [[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
 
-for k in $expected; do
-  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
-    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
-done
-while IFS= read -r k; do
-  case " $expected " in *" $k "*) ;; *)
-    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
-  esac
-done < <(jq -r 'keys[]' <<<"$got")
-jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
-  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
+schema="$here/../assets/compiled/${task_type}.schema.json"
+[[ -r "$schema" ]] || {
+  echo "validate-output: compiled schema missing for '$task_type' ($schema) — update the plugin (it is regenerated at build time)" >&2
+  exit 5
+}
+OUTPUT_JSON="$got" python3 - "$schema" "$task_type" <<'PY'
+import json
+import os
+import sys
+
+try:
+    import jsonschema
+except ImportError:
+    print("validate-output: python3 cannot import jsonschema - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+with open(sys.argv[1]) as f:
+    schema = json.load(f)
+data = json.loads(os.environ["OUTPUT_JSON"])
+
+validator = jsonschema.Draft202012Validator(schema)
+errors = sorted(validator.iter_errors(data), key=lambda e: list(map(str, e.absolute_path)))
+if errors:
+    for e in errors[:5]:
+        path = ".".join(str(p) for p in e.absolute_path)
+        where = f"output_json.{path}" if path else "output_json"
+        hint = ""
+        if e.validator == "additionalProperties" and not path:
+            hint = " - byproducts go in artifacts"
+        print(f"validate-output: {where}: {e.message}{hint}", file=sys.stderr)
+    if len(errors) > 5:
+        print(f"validate-output: ... and {len(errors) - 5} more schema violation(s)", file=sys.stderr)
+    print(f"validate-output: output_json does not satisfy the '{sys.argv[2]}' schema", file=sys.stderr)
+    sys.exit(4)
+PY
 
 echo "ok"
diff --git a/plugins/asta/skills/research-step/workflows/brainstorm.md b/plugins/asta/skills/research-step/workflows/brainstorm.md
index 250ba36..6a9bbf6 100644
--- a/plugins/asta/skills/research-step/workflows/brainstorm.md
+++ b/plugins/asta/skills/research-step/workflows/brainstorm.md
@@ -25,27 +25,27 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)**: open `assets/schemas.yaml` and enumerate the keys under `flows:` — do **not** offer flows from memory; the file is the only source of the list, and each flow's purpose is in its `mission` field. A custom chain of `tasks:` entries is also an option. A session may run more than one flow. Record the chosen flow(s) in `mission.md` so `plan` can read them. Also surface the session **config knobs** (the `config:` section of `assets/schemas.yaml`, e.g. `n_experiments`, `max_papers_to_retrieve`) with their defaults; record any non-default choices in a `## Config` section of `mission.md` (one `key: value` line each) — `plan` pins the resolved config on the epic at bootstrap. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
-- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / ready counts plus the single most-relevant ready task) and ask what they want to do next.
+- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / open-task counts plus the next task from `scripts/next-task.sh`) and ask what they want to do next.
 
 ### 3. Answer questions, preferring `summary.md`
 
-`summary.md` is the synthesized view of the session — mission, scope, definitions, related work, hypotheses, results, open questions, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
+`summary.md` is the synthesized view of the session — mission, flow(s), results so far (report headlines), gaps, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
 
-**Default path: read `summary.md`.** For most questions ("what's the current scope?", "which hypotheses are open?", "what's blocking progress?", "what's the state of H2?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
+**Default path: read `summary.md`.** For most questions ("which laws held?", "what theories came out?", "what's blocking progress?", "what's next?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
 
 **Drop down to beads only when the digest doesn't have the answer.** `summary.md` summarizes; some questions need the raw outputs:
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
 | Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
-| Task tree | `bd list --json` — ids encode the parent-child outline |
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
-| Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
+| Full issue metadata (rare; usually the digest covers it) | `bd list --all --limit 0` |
+| Task tree | `bd list --json --all --limit 0` — ids encode the parent-child outline |
+| Long-form content behind a report | follow `report_path` (or any `_path` field) from the issue's `output_json` |
+| Exact verdict / effect size for a law, theory, or hypothesis | `bd show <adjudicate-id> --json` (the adjudication record; the digest reports headlines, not the numbers) |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
 
diff --git a/plugins/asta/skills/research-step/workflows/execute.md b/plugins/asta/skills/research-step/workflows/execute.md
index a8596e2..b4ba1ef 100644
--- a/plugins/asta/skills/research-step/workflows/execute.md
+++ b/plugins/asta/skills/research-step/workflows/execute.md
@@ -9,23 +9,33 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
-2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
-   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
-   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
-6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
-7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+1. **Pick a task.** If a task ID was supplied, use it. Else run `scripts/next-task.sh` and take the `next:` id — it is the single definition of ordering (open issues with a `task_type`, numerically sorted by hierarchical id; `update-summary` renders the same order). `next: none` ⇒ report that and stop. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
+2. **Check readiness.** For every issue id in this task's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), verify it is `closed` with a non-null `output_json`. If any input is not ready, **stop and report it** — the graph was built out of order (a task left `in_progress`, or a replan misordering); do not improvise the missing input. This is the readiness check that dependency edges used to provide.
+3. **Claim it.** `bd update <id> --status=in_progress`.
+4. **Load the schema and config.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>.output` (a mapping of key → type; `[type]` means a JSON array of that type); find the step inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.experiment_design`) — and use its `mission`, `input`, and `chain`. Read the **session config** pinned on the epic root (`bd show <epic-id> --json | jq '.[0].metadata.research_step.config'`) and pass its values into the chain where they apply — `n_experiments` into the run-metadata JSON for `asta autodiscovery metadata`, `max_papers_to_retrieve` on `asta generate-theories find-and-extract`. Do not re-read defaults from schemas.yaml mid-session; the pin is the truth. (Sessions bootstrapped before config pinning exist: an absent pin means use the schemas.yaml defaults.)
+5. **Gather inputs.** For every issue listed in this issue's `inputs`, read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `report_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+6. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`), and nothing else. Fill every typed field the schema declares (including typed verdicts like `adjudication.outcome` or `audit_report.verdict_survives`); only values with **no typed field** (an execution id, intermediate file paths, raw tool output) go in `artifacts`. Artifact rows are **A2A 1.0 Artifacts** — `{artifactId, name, description, parts, metadata}`, where `parts` is an array of text / file / data parts (see `artifact` and `part` in the schema). Artifacts returned by chain commands are stored as received (their kind in `metadata.type`); locally produced byproducts (a figure, a script, a data file) are wrapped as file parts in the uri form — repo-root-relative path plus mimeType — never the bytes form (beads' ~64KB cap). Records are immutable — emit verdicts and enrichments as their own records referencing the original by id (`adjudication.subject_id`, `source_access.data_source_id`); never re-emit an upstream record with changed values. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key, following the **Report conventions** below (entity hyperlinks, tables, figures). This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+7. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal the keys of `tasks.<task_type>.output` — which always include `artifacts` — none null; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed (it never closes the epic root — the session-complete state is root open with no open tasks). A non-zero exit **before** the `closed <id>` line means the issue is still `in_progress` — fix and re-run. A warning **after** `closed <id>` means the task closed but a group could not be auto-closed; close that group manually. The `description` is untouched; it stays the brief one-liner set at creation.
+8. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+
+## Report conventions
+
+These apply to every `output_markdown` and to every `*_synthesis` report deliverable. Rigorous but not over the top: a report stays roughly 50–100 lines; the detail behind it lives in artifacts it links to.
+
+- **Every named entity is a hyperlink.** Papers → DOI or canonical Semantic Scholar URL; datasets and result files → relative path; runs/experiments → their artifact or metadata file; laws/theories/hypotheses → their ledger row, written with an anchor (`<a id="l1"></a>`) so other reports can deep-link (`reproduction_report.md#l1`). A named thing with no link is a defect.
+- **Tables are the spine.** Any ledger, matrix, or catalog (laws × outcomes, theories × verdicts, sources × access) is a table with one row per record, mirroring the typed rows in `output_json`.
+- **Figures carry the quantitative claims.** Embed each one (`![caption](path)`) where the claim is made and list it in the `figures` output field. Analysis-type tasks must emit at least one figure; synthesis reports embed the figures their headline rests on (effect-size comparisons, verdict panels, discovery-vs-holdout shrinkage).
+- Neutral, third-person register; numbers in the text match the tables they summarize.
 
 ## Notes on output
 
-The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`). `output_json.artifacts` holds A2A Artifacts whose file parts reference those paths by uri; heavy payloads (base64 bytes, raw agent JSON) stay on disk, never inline.
 
 Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
+- `report_path` (from every `*_synthesis` report) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `hypothesis_report.md`, `data_gaps_report.md`).
 
 If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
diff --git a/plugins/asta/skills/research-step/workflows/init.md b/plugins/asta/skills/research-step/workflows/init.md
index fd11be3..408c60f 100644
--- a/plugins/asta/skills/research-step/workflows/init.md
+++ b/plugins/asta/skills/research-step/workflows/init.md
@@ -1,6 +1,6 @@
 # Workflow: init
 
-Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
+Bootstrap the environment for a research session: install `bd`, `jq`, PyYAML, and jsonschema, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
 After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
@@ -32,12 +32,16 @@ Server mode (`bd init --server`) is out of scope: it requires running a Dolt sql
    - If no Dolt refs exist on the remote, surface the situation to the user with three options: (a) `bd import .beads/issues.jsonl` (fast, but discards Dolt history and any state newer than the export), (b) configure a Dolt remote and `bd dolt push` from another machine that has the live DB, then retry, (c) abort.
    - Pick one path only after explicit user confirmation. Never auto-import.
 
-4. **Verify the staleness check works.**
+4. **Ensure `python3` can import `yaml` (PyYAML) and `jsonschema`.** `scripts/task-output-keys.sh` (used by `create-task.sh` and `validate-output.sh`) parses `assets/schemas.yaml` with PyYAML; `validate-output.sh` deep-validates each task's `output_json` against the compiled schemas in `assets/compiled/` with jsonschema, and hard-fails (exit 5) without it.
+   - Probe with `python3 -c 'import yaml, jsonschema'`. If it succeeds, skip.
+   - Otherwise install what's missing: `python3 -m pip install --user pyyaml jsonschema` (or the platform equivalent, e.g. `apt-get install python3-yaml python3-jsonschema`). Re-probe; if it still fails, abort and ask the user.
+
+5. **Verify the staleness check works.**
    - Run `scripts/summary-check.sh`. It hashes the sorted IDs of currently-open issues and compares against `summary.md`'s frontmatter. Backend-agnostic — beads can use whichever storage it likes.
    - Requires `jq` on PATH; if missing, install it (`brew install jq`, `apt-get install jq`, etc.) and retry.
    - At init time `summary.md` does not yet exist, so the script will print `status: missing` and exit 1 — that's fine; **update-summary** will create the file later. `status: no-tools` (exit 3) means abort and ask the user.
 
-5. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
+6. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
 
 ## Cross-machine transfer
 
diff --git a/plugins/asta/skills/research-step/workflows/plan.md b/plugins/asta/skills/research-step/workflows/plan.md
index a000e2d..444ee90 100644
--- a/plugins/asta/skills/research-step/workflows/plan.md
+++ b/plugins/asta/skills/research-step/workflows/plan.md
@@ -23,10 +23,10 @@ The flow in `assets/schemas.yaml` is an indented outline, and the beads graph yo
 
 Reading a flow node:
 
-- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
-- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`. Its `input:` names the upstream steps in this session whose issues you wire as the task's `inputs` (the same task type takes different inputs in different flows, so inputs live on the step, not the task).
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission`, `input`, and `chain` are never nodes.
 - A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
-- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`, `testing`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
 The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
@@ -38,10 +38,10 @@ wf                      [epic]    <mission>
   wf.1.3                          evidence_gathering
   wf.1.4                [fan-out] replication            one branch per law
    wf.1.4.1             [branch]  <law>
-    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.1                    experiment_design
     wf.1.4.1.2                    analysis
-    wf.1.4.1.3                    reproduction_audit
-    wf.1.4.1.4                    reproduce
+    wf.1.4.1.3                    audit
+    wf.1.4.1.4                    adjudicate
    wf.1.4.2             [branch]  <law> …
   wf.1.5                          reproduction_synthesis
 ```
@@ -50,35 +50,37 @@ The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproductio
 
 ## Ordering and closing (no edges)
 
-- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- **Next task = the `next:` line of `scripts/next-task.sh`** (open issues with a `task_type`, **numerically** sorted by hierarchical id — `wf.1.2` before `wf.1.10`). Groups (no `task_type`) are never executed. `execute` and `update-summary` both use this script, so they never disagree about what runs next.
 - Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
-- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. It never closes the **epic root**: "root open, no open tasks" is the session-complete state. Never close groups by hand.
 
 ## Static vs data-dependent fan-outs
 
 - **Static** (`theory_generation` by objective): both branches are known up front → create them together.
-- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+- **Data-dependent** (`replication` per law, `verification` per testable theory, `testing` per hypothesis): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`, `hypothesis_formation`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
 
 ## Gates (replan)
 
-- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `experiment_design` closes (a `replication` or `testing` branch): `feasibility` of `feasible`/`proxy_only` → create the branch's remaining steps — in `testing`, also `data_acquisition` when the design names data not yet in hand — i.e. `[data_acquisition,] analysis`, `audit`, `adjudicate`; `data_unavailable`/`construct_mismatch` → create only `adjudicate` (it records `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
 - When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
+- When `hypothesis_formation` closes: create one `testing` branch per hypothesis.
 
 ## Bootstrap
 
 1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
-2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
-3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
-4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
+2. **Resolve the session config.** Start from the `config:` defaults in `assets/schemas.yaml`; apply any overrides from a `## Config` section in `mission.md` (one `key: value` line each; unknown keys are an error — surface them). The resolved map is pinned in the next step and never re-resolved mid-session.
+3. `bd create -t epic` the root from the mission, tagged with metadata `{"research_step": {"epic_root": true, "flow": "<flow>", "config": {<resolved config>}}}`. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+4. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+5. Report the epic id, the flow, the resolved config, the loop/group ids, and the frontier task ids.
 
 ## Replan
 
 When a step closes, create the next node(s) under their parent, in flow order:
 
-- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
-- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
-- Apply the **Gates** rules above.
-- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
+- Create each step with `create-task.sh`. Its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling; the step's `input:` list in `schemas.yaml` names **which** upstream steps to wire.
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the branch steps under each via `create-task.sh` — **but a gated group lays only the steps up to its gate**: under a `replication` or `testing` branch create only `experiment_design`; the Gate below creates the rest when it closes. Ungated branches (`verification`: analysis, audit, adjudicate; `theory_generation`: theory_formation) get all their steps at branch creation. Record why for any branch the data can't support, rather than skipping it.
+- Apply the **Gates** rules above — they are the only creator of post-gate steps, so nothing is double-created.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `hypothesis_synthesis`, `discovery_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape.
 
 Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
diff --git a/plugins/asta/skills/research-step/workflows/update-summary.md b/plugins/asta/skills/research-step/workflows/update-summary.md
index 311c81a..a96a9fa 100644
--- a/plugins/asta/skills/research-step/workflows/update-summary.md
+++ b/plugins/asta/skills/research-step/workflows/update-summary.md
@@ -15,12 +15,11 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** Everything comes from `bd list --json`:
-   - the full tree (issue_count, status partition);
-   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
-   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
+3. **Gather state inline.**
+   - `bd list --json --all --limit 0` for the full tree — `--all` because closed issues carry the results, `--limit 0` because bd truncates at 50 rows by default. Project to `{id, task_type: .metadata.research_step.task_type, title, status}` and partition by `.status`.
+   - `scripts/next-task.sh` for the **next task and the queue** (open task-type issues, numerically sorted by id — the same order `execute` uses). This replaces `bd ready`; there are no edges, so id order is the ordering signal.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
-5. **Overwrite `summary.md`** using this template:
+5. **Overwrite `summary.md`** using this template (sections come from the **new taxonomy** — flows, laws, theories, reports — not from any per-flow hardcoding; render what the closed tasks' `output_json` actually contains):
 
    ```markdown
    ---
@@ -28,7 +27,7 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    beads_epic: <bd-id>
    generated_at: <ISO-8601 UTC>
    issue_count: <n>
-   ready_count: <n>
+   open_task_count: <n>
    ---
 
    # <mission title>
@@ -36,36 +35,29 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Mission
    <verbatim mission.md, or one-paragraph summary if long>
 
-   ## Research Question & Scope
-   <from scope issue's output, or "pending" if not yet closed>
+   ## Flow
+   <one line per flow this session runs (from task metadata `flow`), with where it
+   stands — e.g. "reproduction — replication branches 2/5 closed, synthesis pending">
 
-   ## Operational Definitions
-   <from definitions issue's output>
+   ## Results so far
+   <one subsection per closed `*_synthesis` task: the report's `headline` plus a link
+   to its `report_path`. Before any synthesis has closed, instead give one bullet per
+   closed task: "<bd-id> [<task_type>]: <one-line outcome from output_json>" — e.g.
+   laws extracted, datasets acquired, theories formed, verdicts finalized.>
 
-   ## Related Work
-   <literature_review.output.key_findings as bullets; link to summary_path>
-
-   ## Hypotheses
-   <one subsection per hypothesis issue: "H_n: <statement>" plus current verdict from its analysis if closed>
-
-   ## Experimental Designs
-   <one subsection per experiment_design, grouped under its hypothesis>
-
-   ## Results Summary
-   <table: hypothesis | verdict | confidence | analysis-id>
-
-   ## Open Questions
-   <synthesis.output.open_questions if synthesis exists, else aggregated from in-flight notes>
+   ## Gaps
+   <the `gaps` rows from closed report outputs (item — missing_data — severity),
+   or "none recorded">
 
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
+   - Open tasks: <n> — next: <`next:` from next-task.sh>; queue: <`queue:` line>
 
    ### Next Steps
-   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
+   <the queue from next-task.sh in order, one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If there are no open task issues, write "No open tasks — flow complete.">
+   If next-task.sh prints `next: none`, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)
diff --git a/skills/research-step/SKILL.md b/skills/research-step/SKILL.md
index 49a7fec..e9f9a8c 100644
--- a/skills/research-step/SKILL.md
+++ b/skills/research-step/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-step
 description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs.
-allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
+allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Bash(asta:*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*)
 ---
 
 # Research Step
 
-Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; typically a separate epic kicked off after a theory-generation run), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types`, the `tasks` (typed `input`/`output` + a common `artifacts`), and the `flows` (each step carrying its `mission` + asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
+Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `hypothesis_driven_research` flow (literature → falsifiable hypotheses → one prespecified test per hypothesis), the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; run it as its own session in a **separate workspace** — own `mission.md` and `.beads` — typically kicked off after a theory-generation run; a second epic root in the same workspace breaks `scripts/epic-root.sh`), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types` (immutable records — verdicts are `adjudication` records referencing their subject), the `tasks` (pure output contracts mapping each output key to its type), and the `flows` (each step carrying its `mission`, its `input` steps, and its asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`.
 
 This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first.
 
@@ -23,7 +23,7 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec
 | `mission.md` | Input. The research task. |
 | `.beads/` | Source of truth for state. |
 | `summary.md` | Derived view of the session, regenerated by **update-summary**. Beads is the source of truth; this file is just a digest for humans and for **brainstorm**. Frontmatter `beads_snapshot` records the state it was rendered from. |
-| `background_knowledge.txt` | Optional. Long-form context referenced from issue metadata via `summary_path`. |
+| `.asta/<agent>/<slug>/` | Heavy artifacts (raw agent JSON, datasets, reports), referenced from `output_json` by repo-root-relative `_path` fields. |
 
 ## Workflows
 
@@ -51,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working
 
 - **init** → always run **plan** afterwards (which then chains to **update-summary**).
 - **plan** → always run **update-summary** afterwards so the digest reflects the new graph.
-- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off table in `execute.md`); otherwise chain directly to **update-summary**.
+- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off rule in `execute.md`, last step); otherwise chain directly to **update-summary**.
 - **update-summary** and **brainstorm** → never chain.
 
 ## Boundaries
diff --git a/skills/research-step/assets/compiled/adjudicate.schema.json b/skills/research-step/assets/compiled/adjudicate.schema.json
new file mode 100644
index 0000000..ccfb9d1
--- /dev/null
+++ b/skills/research-step/assets/compiled/adjudicate.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/adjudicate.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "artifacts"
+  ],
+  "title": "adjudicate",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/analysis.schema.json b/skills/research-step/assets/compiled/analysis.schema.json
new file mode 100644
index 0000000..55e557d
--- /dev/null
+++ b/skills/research-step/assets/compiled/analysis.schema.json
@@ -0,0 +1,119 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "analysis": {
+      "additionalProperties": true,
+      "properties": {
+        "assumptions": {
+          "type": "string"
+        },
+        "code": {
+          "type": "string"
+        },
+        "final_answer": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "final_answer",
+        "assumptions",
+        "code"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/analysis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "analysis": {
+      "$ref": "#/$defs/analysis"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "analysis",
+    "figures",
+    "artifacts"
+  ],
+  "title": "analysis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/audit.schema.json b/skills/research-step/assets/compiled/audit.schema.json
new file mode 100644
index 0000000..ca21120
--- /dev/null
+++ b/skills/research-step/assets/compiled/audit.schema.json
@@ -0,0 +1,127 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "audit_report": {
+      "additionalProperties": true,
+      "properties": {
+        "artifacts_found": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "challenges": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "check": {
+                "type": "string"
+              },
+              "concern": {
+                "type": "string"
+              },
+              "outcome": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "concern",
+              "check",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "recommended_adjustment": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "verdict_survives": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "subject_id",
+        "challenges",
+        "artifacts_found",
+        "verdict_survives",
+        "recommended_adjustment"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/audit.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "audit_report": {
+      "$ref": "#/$defs/audit_report"
+    }
+  },
+  "required": [
+    "audit_report",
+    "artifacts"
+  ],
+  "title": "audit",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/auto_discovery.mmd b/skills/research-step/assets/compiled/auto_discovery.mmd
new file mode 100644
index 0000000..14cd992
--- /dev/null
+++ b/skills/research-step/assets/compiled/auto_discovery.mmd
@@ -0,0 +1,18 @@
+%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
+  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
+  subgraph replication["replication (at replan)"]
+    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
+  end
+  class replication replan
+  discovery_synthesis["discovery_synthesis"]
+  cohort_assembly --> discovery_run
+  discovery_run --> replication__holdout_replication
+  cohort_assembly --> replication__holdout_replication
+  discovery_run --> discovery_synthesis
+  replication --> discovery_synthesis
diff --git a/skills/research-step/assets/compiled/cohort_assembly.schema.json b/skills/research-step/assets/compiled/cohort_assembly.schema.json
new file mode 100644
index 0000000..4866540
--- /dev/null
+++ b/skills/research-step/assets/compiled/cohort_assembly.schema.json
@@ -0,0 +1,206 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "cohort": {
+      "additionalProperties": true,
+      "properties": {
+        "discovery_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "exclusion_criteria": {
+          "type": "string"
+        },
+        "holdout_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "id": {
+          "type": "string"
+        },
+        "inclusion_criteria": {
+          "type": "string"
+        },
+        "research_question": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source_data_sources": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "research_question",
+        "inclusion_criteria",
+        "exclusion_criteria",
+        "sampling",
+        "source_data_sources",
+        "discovery_subset",
+        "holdout_subset",
+        "run_id"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/cohort_assembly.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "cohort": {
+      "$ref": "#/$defs/cohort"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "cohort",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "cohort_assembly",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/data_acquisition.schema.json b/skills/research-step/assets/compiled/data_acquisition.schema.json
new file mode 100644
index 0000000..0bec23c
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_acquisition.schema.json
@@ -0,0 +1,161 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "acquisition": {
+      "additionalProperties": true,
+      "properties": {
+        "access_status": {
+          "enum": [
+            "acquired",
+            "open_unfetched",
+            "restricted",
+            "not_found"
+          ]
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "dataset_id": {
+          "type": "string"
+        },
+        "local_path": {
+          "type": "string"
+        },
+        "validation_note": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "access_status",
+        "local_path",
+        "dataset_id",
+        "validation_note"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_acquisition.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "acquisitions": {
+      "items": {
+        "$ref": "#/$defs/acquisition"
+      },
+      "type": "array"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "acquisitions",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_acquisition",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
new file mode 100644
index 0000000..cb56eed
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
@@ -0,0 +1,92 @@
+%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  subgraph data_provenance["data_provenance [flow: data_provenance]"]
+    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    data_provenance__provenance_synthesis["provenance_synthesis"]
+  end
+  class data_provenance embed
+  subgraph reproduction["reproduction [flow: reproduction]"]
+    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+    reproduction__law_extraction["law_extraction"]
+    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+    subgraph reproduction__replication["replication (at replan)"]
+      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
+      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__adjudicate["adjudicate"]
+    end
+    class reproduction__replication replan
+    reproduction__reproduction_synthesis["reproduction_synthesis"]
+  end
+  class reproduction embed
+  subgraph theorizer["theorizer [flow: theorizer]"]
+    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    subgraph theorizer__theory_generation["theory_generation"]
+      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+    end
+    theorizer__testability_triage["testability_triage"]
+    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+    theorizer__theory_synthesis["theory_synthesis"]
+  end
+  class theorizer embed
+  subgraph verification["verification (at replan)"]
+    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__adjudicate["adjudicate"]
+  end
+  class verification replan
+  verification_synthesis["verification_synthesis"]
+  gap_synthesis["gap_synthesis"]
+  final_synthesis["final_synthesis"]
+  data_provenance__provenance_search --> data_provenance__provenance_extraction
+  data_provenance__provenance_search --> data_provenance__data_acquisition
+  data_provenance__provenance_extraction --> data_provenance__data_acquisition
+  data_provenance__provenance_search --> data_provenance__provenance_synthesis
+  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
+  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
+  reproduction__data_driven_discovery --> reproduction__law_extraction
+  reproduction__law_extraction --> reproduction__evidence_gathering
+  reproduction__law_extraction --> reproduction__replication__experiment_design
+  reproduction__evidence_gathering --> reproduction__replication__experiment_design
+  reproduction__replication__experiment_design --> reproduction__replication__analysis
+  reproduction__evidence_gathering --> reproduction__replication__analysis
+  reproduction__replication__analysis --> reproduction__replication__audit
+  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
+  reproduction__replication__analysis --> reproduction__replication__adjudicate
+  reproduction__replication__audit --> reproduction__replication__adjudicate
+  reproduction__law_extraction --> reproduction__reproduction_synthesis
+  reproduction__replication --> reproduction__reproduction_synthesis
+  reproduction__law_extraction --> theorizer__evidence_extraction
+  reproduction__replication__adjudicate --> theorizer__evidence_extraction
+  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
+  theorizer__theory_generation --> theorizer__testability_triage
+  reproduction__data_driven_discovery --> theorizer__testability_triage
+  reproduction__evidence_gathering --> theorizer__testability_triage
+  theorizer__testability_triage --> theorizer__novelty_assessment
+  theorizer__theory_generation --> theorizer__theory_synthesis
+  theorizer__novelty_assessment --> theorizer__theory_synthesis
+  theorizer__testability_triage --> theorizer__theory_synthesis
+  theorizer__testability_triage --> verification__analysis
+  reproduction__data_driven_discovery --> verification__analysis
+  reproduction__evidence_gathering --> verification__analysis
+  verification__analysis --> verification__audit
+  theorizer__testability_triage --> verification__adjudicate
+  verification__analysis --> verification__adjudicate
+  verification__audit --> verification__adjudicate
+  verification --> verification_synthesis
+  theorizer__novelty_assessment --> verification_synthesis
+  data_provenance__provenance_synthesis --> gap_synthesis
+  reproduction__reproduction_synthesis --> gap_synthesis
+  theorizer__theory_synthesis --> gap_synthesis
+  verification_synthesis --> gap_synthesis
+  data_provenance__provenance_synthesis --> final_synthesis
+  reproduction__reproduction_synthesis --> final_synthesis
+  theorizer__theory_synthesis --> final_synthesis
+  verification_synthesis --> final_synthesis
+  gap_synthesis --> final_synthesis
diff --git a/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/skills/research-step/assets/compiled/data_driven_discovery.schema.json
new file mode 100644
index 0000000..14f65a7
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_driven_discovery.schema.json
@@ -0,0 +1,152 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_driven_discovery.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_driven_discovery",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/data_provenance.mmd b/skills/research-step/assets/compiled/data_provenance.mmd
new file mode 100644
index 0000000..3b46977
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_provenance.mmd
@@ -0,0 +1,16 @@
+%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+  provenance_synthesis["provenance_synthesis"]
+  provenance_search --> provenance_extraction
+  provenance_search --> data_acquisition
+  provenance_extraction --> data_acquisition
+  provenance_search --> provenance_synthesis
+  provenance_extraction --> provenance_synthesis
+  data_acquisition --> provenance_synthesis
diff --git a/skills/research-step/assets/compiled/discovery_run.schema.json b/skills/research-step/assets/compiled/discovery_run.schema.json
new file mode 100644
index 0000000..b7ac259
--- /dev/null
+++ b/skills/research-step/assets/compiled/discovery_run.schema.json
@@ -0,0 +1,170 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_run.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "discovery_run",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/skills/research-step/assets/compiled/discovery_synthesis.schema.json
new file mode 100644
index 0000000..29cb31f
--- /dev/null
+++ b/skills/research-step/assets/compiled/discovery_synthesis.schema.json
@@ -0,0 +1,271 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "discovery_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "interpretation": {
+          "type": "string"
+        },
+        "laws": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "deciding_experiment": {
+                "type": "string"
+              },
+              "effect_size_discovery": {
+                "type": "string"
+              },
+              "effect_size_holdout": {
+                "type": "string"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "surprise": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "surprise",
+              "outcome",
+              "deciding_experiment",
+              "effect_size_discovery",
+              "effect_size_holdout"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "run_id",
+        "laws",
+        "interpretation",
+        "next_steps",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "discovery_report": {
+      "$ref": "#/$defs/discovery_report"
+    }
+  },
+  "required": [
+    "discovery_report",
+    "artifacts"
+  ],
+  "title": "discovery_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/evidence_extraction.schema.json b/skills/research-step/assets/compiled/evidence_extraction.schema.json
new file mode 100644
index 0000000..7a53a5b
--- /dev/null
+++ b/skills/research-step/assets/compiled/evidence_extraction.schema.json
@@ -0,0 +1,132 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "artifacts"
+  ],
+  "title": "evidence_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/evidence_gathering.schema.json b/skills/research-step/assets/compiled/evidence_gathering.schema.json
new file mode 100644
index 0000000..c310796
--- /dev/null
+++ b/skills/research-step/assets/compiled/evidence_gathering.schema.json
@@ -0,0 +1,121 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_gathering.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "datasets",
+    "artifacts"
+  ],
+  "title": "evidence_gathering",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/experiment_design.schema.json b/skills/research-step/assets/compiled/experiment_design.schema.json
new file mode 100644
index 0000000..458fe42
--- /dev/null
+++ b/skills/research-step/assets/compiled/experiment_design.schema.json
@@ -0,0 +1,162 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "experiment_design": {
+      "additionalProperties": true,
+      "properties": {
+        "construct_equivalence": {
+          "enum": [
+            "equivalent",
+            "proxy",
+            "mismatch"
+          ]
+        },
+        "data_gap": {
+          "type": "string"
+        },
+        "experiment_design_query": {
+          "type": "string"
+        },
+        "experiment_name": {
+          "type": "string"
+        },
+        "feasibility": {
+          "enum": [
+            "feasible",
+            "proxy_only",
+            "data_unavailable",
+            "construct_mismatch"
+          ]
+        },
+        "independent_operationalization": {
+          "type": "string"
+        },
+        "plain_language_description": {
+          "type": "string"
+        },
+        "prespecified": {
+          "additionalProperties": true,
+          "properties": {
+            "metric": {
+              "type": "string"
+            },
+            "success_threshold": {
+              "type": "string"
+            },
+            "test": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "test",
+            "metric",
+            "success_threshold"
+          ],
+          "type": "object"
+        },
+        "required_data": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "experiment_name",
+        "plain_language_description",
+        "source_operationalization",
+        "independent_operationalization",
+        "construct_equivalence",
+        "feasibility",
+        "required_data",
+        "data_gap",
+        "experiment_design_query",
+        "prespecified"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/experiment_design.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "experiment_design": {
+      "$ref": "#/$defs/experiment_design"
+    }
+  },
+  "required": [
+    "experiment_design",
+    "artifacts"
+  ],
+  "title": "experiment_design",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/final_synthesis.schema.json b/skills/research-step/assets/compiled/final_synthesis.schema.json
new file mode 100644
index 0000000..b00f085
--- /dev/null
+++ b/skills/research-step/assets/compiled/final_synthesis.schema.json
@@ -0,0 +1,289 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "research_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "inference_chain": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "chain": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "claim": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "claim",
+              "chain"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sub_reports": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "report_path",
+              "one_line"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "tensions_and_surprises": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "evidence": {
+                "type": "string"
+              },
+              "observation": {
+                "type": "string"
+              },
+              "where": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "observation",
+              "where",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_highlights": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "claim": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_was_done": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theory_highlights",
+        "inference_chain",
+        "what_was_done",
+        "sub_reports",
+        "tensions_and_surprises",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/final_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "research_report": {
+      "$ref": "#/$defs/research_report"
+    }
+  },
+  "required": [
+    "research_report",
+    "artifacts"
+  ],
+  "title": "final_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/flows.json b/skills/research-step/assets/compiled/flows.json
new file mode 100644
index 0000000..907a432
--- /dev/null
+++ b/skills/research-step/assets/compiled/flows.json
@@ -0,0 +1,6657 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "flows": {
+    "auto_discovery": {
+      "edges": [
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "discovery_run"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "discovery_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "discovery_synthesis"
+        }
+      ],
+      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta documents",
+            "asta generate-theories find-and-extract",
+            "asta autodiscovery create",
+            "asta autodiscovery upload",
+            "asta autodiscovery metadata"
+          ],
+          "id": "cohort_assembly",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
+          "name": "cohort_assembly",
+          "parent": null,
+          "replan": false,
+          "task": "cohort_assembly"
+        },
+        {
+          "chain": [
+            "asta autodiscovery submit",
+            "asta autodiscovery experiments"
+          ],
+          "id": "discovery_run",
+          "inputs": [
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
+          "name": "discovery_run",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_run"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__holdout_replication",
+          "inputs": [
+            "discovery_run",
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "holdout_replication",
+          "parent": "replication",
+          "replan": false,
+          "task": "holdout_replication"
+        },
+        {
+          "chain": [],
+          "id": "discovery_synthesis",
+          "inputs": [
+            "discovery_run",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
+          "name": "discovery_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_synthesis"
+        }
+      ]
+    },
+    "data_and_literature_grounded_theory_generation": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_provenance__data_acquisition",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "reproduction__law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "reproduction__replication__audit",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "reproduction__replication",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "adjudicate",
+          "source": "reproduction__replication__adjudicate",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "theorizer__evidence_extraction",
+          "target": "theorizer__theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__audit"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "verification__audit",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "verification",
+          "source": "verification",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "gap_synthesis",
+          "source": "gap_synthesis",
+          "target": "final_synthesis"
+        }
+      ],
+      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
+      "nodes": [
+        {
+          "id": "data_provenance",
+          "kind": "embed",
+          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
+          "name": "data_provenance",
+          "parent": null,
+          "replan": false,
+          "workflow": "data_provenance"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "data_provenance__provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "data_provenance__provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_provenance__data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "data_provenance__provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_synthesis"
+        },
+        {
+          "id": "reproduction",
+          "kind": "embed",
+          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
+          "name": "reproduction",
+          "parent": null,
+          "replan": false,
+          "workflow": "reproduction"
+        },
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "reproduction__data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "reproduction__evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "reproduction__replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": "reproduction",
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "reproduction__replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "reproduction_synthesis"
+        },
+        {
+          "id": "theorizer",
+          "kind": "embed",
+          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
+          "name": "theorizer",
+          "parent": null,
+          "replan": false,
+          "workflow": "theorizer"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "theorizer__evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theorizer__theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": "theorizer",
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theorizer__theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theorizer__theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "theorizer__novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "verification",
+          "kind": "group",
+          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
+          "name": "verification",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__analysis",
+          "inputs": [
+            "testability_triage",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "verification",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "verification",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "verification__adjudicate",
+          "inputs": [
+            "testability_triage",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
+          "name": "adjudicate",
+          "parent": "verification",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "verification_synthesis",
+          "inputs": [
+            "verification",
+            "novelty_assessment"
+          ],
+          "kind": "step",
+          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
+          "name": "verification_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "verification_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "gap_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
+          "name": "gap_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "gap_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "final_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis",
+            "gap_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
+          "name": "final_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "final_synthesis"
+        }
+      ]
+    },
+    "data_provenance": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_acquisition",
+          "target": "provenance_synthesis"
+        }
+      ],
+      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": null,
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_synthesis"
+        }
+      ]
+    },
+    "hypothesis_driven_research": {
+      "edges": [
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "hypothesis_formation"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "testing__data_acquisition",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "testing__audit",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "hypothesis_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testing",
+          "source": "testing",
+          "target": "hypothesis_synthesis"
+        }
+      ],
+      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "literature_review",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
+          "name": "literature_review",
+          "parent": null,
+          "replan": false,
+          "task": "literature_review"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "hypothesis_formation",
+          "inputs": [
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
+          "name": "hypothesis_formation",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_formation"
+        },
+        {
+          "id": "testing",
+          "kind": "group",
+          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
+          "name": "testing",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "testing__experiment_design",
+          "inputs": [
+            "hypothesis_formation",
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "testing",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "testing__data_acquisition",
+          "inputs": [
+            "experiment_design"
+          ],
+          "kind": "step",
+          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
+          "name": "data_acquisition",
+          "parent": "testing",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__analysis",
+          "inputs": [
+            "experiment_design",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "testing",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "testing",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "testing__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
+          "name": "adjudicate",
+          "parent": "testing",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "hypothesis_synthesis",
+          "inputs": [
+            "hypothesis_formation",
+            "testing"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
+          "name": "hypothesis_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_synthesis"
+        }
+      ]
+    },
+    "reproduction": {
+      "edges": [
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "data_driven_discovery",
+          "target": "law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "replication__audit",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "reproduction_synthesis"
+        }
+      ],
+      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
+      "nodes": [
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "reproduction_synthesis"
+        }
+      ]
+    },
+    "theorizer": {
+      "edges": [
+        {
+          "external": true,
+          "input": "law_extraction",
+          "source": "ext__law_extraction",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": true,
+          "input": "adjudicate",
+          "source": "ext__adjudicate",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "evidence_extraction",
+          "target": "theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "data_driven_discovery",
+          "source": "ext__data_driven_discovery",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "evidence_gathering",
+          "source": "ext__evidence_gathering",
+          "target": "testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "novelty_assessment",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "theory_synthesis"
+        }
+      ],
+      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
+      "nodes": [
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": null,
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": null,
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": null,
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "ext__adjudicate",
+          "kind": "external",
+          "mission": "",
+          "name": "adjudicate",
+          "parent": null,
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "id": "ext__data_driven_discovery",
+          "kind": "external",
+          "mission": "",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "id": "ext__evidence_gathering",
+          "kind": "external",
+          "mission": "",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "ext__law_extraction",
+          "kind": "external",
+          "mission": "",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        }
+      ]
+    }
+  },
+  "format_version": 1,
+  "schema_version": 2,
+  "tasks": {
+    "adjudicate": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/adjudicate.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "artifacts"
+        ],
+        "title": "adjudicate",
+        "type": "object"
+      }
+    },
+    "analysis": {
+      "output": {
+        "analysis": "analysis",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "analysis": {
+            "additionalProperties": true,
+            "properties": {
+              "assumptions": {
+                "type": "string"
+              },
+              "code": {
+                "type": "string"
+              },
+              "final_answer": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "final_answer",
+              "assumptions",
+              "code"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/analysis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "analysis": {
+            "$ref": "#/$defs/analysis"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "analysis",
+          "figures",
+          "artifacts"
+        ],
+        "title": "analysis",
+        "type": "object"
+      }
+    },
+    "audit": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "audit_report": "audit_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "audit_report": {
+            "additionalProperties": true,
+            "properties": {
+              "artifacts_found": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "challenges": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "check": {
+                      "type": "string"
+                    },
+                    "concern": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "concern",
+                    "check",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "recommended_adjustment": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "verdict_survives": {
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "subject_id",
+              "challenges",
+              "artifacts_found",
+              "verdict_survives",
+              "recommended_adjustment"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/audit.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "audit_report": {
+            "$ref": "#/$defs/audit_report"
+          }
+        },
+        "required": [
+          "audit_report",
+          "artifacts"
+        ],
+        "title": "audit",
+        "type": "object"
+      }
+    },
+    "cohort_assembly": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "cohort": "cohort",
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "cohort": {
+            "additionalProperties": true,
+            "properties": {
+              "discovery_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "exclusion_criteria": {
+                "type": "string"
+              },
+              "holdout_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "id": {
+                "type": "string"
+              },
+              "inclusion_criteria": {
+                "type": "string"
+              },
+              "research_question": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source_data_sources": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "research_question",
+              "inclusion_criteria",
+              "exclusion_criteria",
+              "sampling",
+              "source_data_sources",
+              "discovery_subset",
+              "holdout_subset",
+              "run_id"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/cohort_assembly.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "cohort": {
+            "$ref": "#/$defs/cohort"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "cohort",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "cohort_assembly",
+        "type": "object"
+      }
+    },
+    "data_acquisition": {
+      "output": {
+        "acquisitions": [
+          "acquisition"
+        ],
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "acquisition": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "validation_note": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "access_status",
+              "local_path",
+              "dataset_id",
+              "validation_note"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_acquisition.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "acquisitions": {
+            "items": {
+              "$ref": "#/$defs/acquisition"
+            },
+            "type": "array"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "acquisitions",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_acquisition",
+        "type": "object"
+      }
+    },
+    "data_driven_discovery": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_driven_discovery.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_driven_discovery",
+        "type": "object"
+      }
+    },
+    "discovery_run": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_run.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "discovery_run",
+        "type": "object"
+      }
+    },
+    "discovery_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "discovery_report": "discovery_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "discovery_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "interpretation": {
+                "type": "string"
+              },
+              "laws": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "deciding_experiment": {
+                      "type": "string"
+                    },
+                    "effect_size_discovery": {
+                      "type": "string"
+                    },
+                    "effect_size_holdout": {
+                      "type": "string"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "surprise": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "surprise",
+                    "outcome",
+                    "deciding_experiment",
+                    "effect_size_discovery",
+                    "effect_size_holdout"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "run_id",
+              "laws",
+              "interpretation",
+              "next_steps",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "discovery_report": {
+            "$ref": "#/$defs/discovery_report"
+          }
+        },
+        "required": [
+          "discovery_report",
+          "artifacts"
+        ],
+        "title": "discovery_synthesis",
+        "type": "object"
+      }
+    },
+    "evidence_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "artifacts"
+        ],
+        "title": "evidence_extraction",
+        "type": "object"
+      }
+    },
+    "evidence_gathering": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_gathering.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "datasets",
+          "artifacts"
+        ],
+        "title": "evidence_gathering",
+        "type": "object"
+      }
+    },
+    "experiment_design": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "experiment_design": "experiment_design"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "experiment_design": {
+            "additionalProperties": true,
+            "properties": {
+              "construct_equivalence": {
+                "enum": [
+                  "equivalent",
+                  "proxy",
+                  "mismatch"
+                ]
+              },
+              "data_gap": {
+                "type": "string"
+              },
+              "experiment_design_query": {
+                "type": "string"
+              },
+              "experiment_name": {
+                "type": "string"
+              },
+              "feasibility": {
+                "enum": [
+                  "feasible",
+                  "proxy_only",
+                  "data_unavailable",
+                  "construct_mismatch"
+                ]
+              },
+              "independent_operationalization": {
+                "type": "string"
+              },
+              "plain_language_description": {
+                "type": "string"
+              },
+              "prespecified": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "experiment_name",
+              "plain_language_description",
+              "source_operationalization",
+              "independent_operationalization",
+              "construct_equivalence",
+              "feasibility",
+              "required_data",
+              "data_gap",
+              "experiment_design_query",
+              "prespecified"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/experiment_design.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "experiment_design": {
+            "$ref": "#/$defs/experiment_design"
+          }
+        },
+        "required": [
+          "experiment_design",
+          "artifacts"
+        ],
+        "title": "experiment_design",
+        "type": "object"
+      }
+    },
+    "final_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "research_report": "research_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "research_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "inference_chain": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "chain": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "claim": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "claim",
+                    "chain"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sub_reports": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "kind": {
+                      "type": "string"
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "report_path": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "kind",
+                    "report_path",
+                    "one_line"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "tensions_and_surprises": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "observation": {
+                      "type": "string"
+                    },
+                    "where": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "observation",
+                    "where",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_highlights": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "claim": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_was_done": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theory_highlights",
+              "inference_chain",
+              "what_was_done",
+              "sub_reports",
+              "tensions_and_surprises",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/final_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "research_report": {
+            "$ref": "#/$defs/research_report"
+          }
+        },
+        "required": [
+          "research_report",
+          "artifacts"
+        ],
+        "title": "final_synthesis",
+        "type": "object"
+      }
+    },
+    "gap_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_gaps_report": "data_gaps_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_gaps_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "arose_in": {
+                      "type": "string"
+                    },
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity",
+                    "arose_in"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "gaps",
+              "next_steps",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/gap_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_gaps_report": {
+            "$ref": "#/$defs/data_gaps_report"
+          }
+        },
+        "required": [
+          "data_gaps_report",
+          "artifacts"
+        ],
+        "title": "gap_synthesis",
+        "type": "object"
+      }
+    },
+    "holdout_replication": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/holdout_replication.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "figures",
+          "artifacts"
+        ],
+        "title": "holdout_replication",
+        "type": "object"
+      }
+    },
+    "hypothesis_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypotheses": [
+          "hypothesis"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "hypothesis": {
+            "additionalProperties": true,
+            "properties": {
+              "falsifiable_prediction": {
+                "type": "string"
+              },
+              "grounds": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "rationale": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "rationale",
+              "falsifiable_prediction",
+              "grounds"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypotheses": {
+            "items": {
+              "$ref": "#/$defs/hypothesis"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "hypotheses",
+          "artifacts"
+        ],
+        "title": "hypothesis_formation",
+        "type": "object"
+      }
+    },
+    "hypothesis_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypothesis_report": "hypothesis_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "hypothesis_report": {
+            "additionalProperties": true,
+            "properties": {
+              "answer": {
+                "type": "string"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "hypothesis_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "hypothesis_id",
+                    "statement",
+                    "outcome",
+                    "effect_size_observed",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_questions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "question": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "question",
+              "ledger",
+              "answer",
+              "open_questions",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypothesis_report": {
+            "$ref": "#/$defs/hypothesis_report"
+          }
+        },
+        "required": [
+          "hypothesis_report",
+          "artifacts"
+        ],
+        "title": "hypothesis_synthesis",
+        "type": "object"
+      }
+    },
+    "law_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/law_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "law_extraction",
+        "type": "object"
+      }
+    },
+    "literature_review": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "literature_review": "literature_review"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "literature_review": {
+            "additionalProperties": true,
+            "properties": {
+              "citations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "corpus_id": {
+                      "type": "number"
+                    },
+                    "id": {
+                      "type": "string"
+                    },
+                    "relevance": {
+                      "type": "string"
+                    },
+                    "title": {
+                      "type": "string"
+                    },
+                    "url": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "corpus_id",
+                    "title",
+                    "url",
+                    "relevance"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "key_findings": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_gaps": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "summary": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "summary",
+              "key_findings",
+              "open_gaps",
+              "citations"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/literature_review.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "literature_review": {
+            "$ref": "#/$defs/literature_review"
+          }
+        },
+        "required": [
+          "literature_review",
+          "artifacts"
+        ],
+        "title": "literature_review",
+        "type": "object"
+      }
+    },
+    "novelty_assessment": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_evaluations": [
+          "theory_evaluation"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_evaluation": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "overall_support": {
+                "enum": [
+                  "supports",
+                  "mixed",
+                  "contradicts",
+                  "inconclusive"
+                ]
+              },
+              "overall_support_raw": {
+                "type": "string"
+              },
+              "statement_evaluations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "explanation": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "statement_index": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "statement_index",
+                    "novelty",
+                    "explanation"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "theory_id",
+              "novelty",
+              "overall_support",
+              "explanation",
+              "statement_evaluations"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/novelty_assessment.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_evaluations": {
+            "items": {
+              "$ref": "#/$defs/theory_evaluation"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theory_evaluations",
+          "artifacts"
+        ],
+        "title": "novelty_assessment",
+        "type": "object"
+      }
+    },
+    "provenance_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data",
+        "source_access": [
+          "source_access"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "source_access": {
+            "additionalProperties": true,
+            "properties": {
+              "data_availability": {
+                "type": "string"
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "identifier": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "data_availability",
+              "repository",
+              "identifier"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          },
+          "source_access": {
+            "items": {
+              "$ref": "#/$defs/source_access"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "source_access",
+          "artifacts"
+        ],
+        "title": "provenance_extraction",
+        "type": "object"
+      }
+    },
+    "provenance_search": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_sources": [
+          "data_source"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_source": {
+            "additionalProperties": true,
+            "properties": {
+              "dataset_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "dataset_id",
+              "paper_id",
+              "paper_title",
+              "paper_url"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_search.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_sources": {
+            "items": {
+              "$ref": "#/$defs/data_source"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "data_sources",
+          "artifacts"
+        ],
+        "title": "provenance_search",
+        "type": "object"
+      }
+    },
+    "provenance_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "provenance_report": "provenance_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "provenance_report": {
+            "additionalProperties": true,
+            "properties": {
+              "acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "not_acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sources": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "access_status": {
+                      "enum": [
+                        "acquired",
+                        "open_unfetched",
+                        "restricted",
+                        "not_found"
+                      ]
+                    },
+                    "dataset_id": {
+                      "type": "string"
+                    },
+                    "local_path": {
+                      "type": "string"
+                    },
+                    "paper_title": {
+                      "type": "string"
+                    },
+                    "paper_url": {
+                      "type": "string"
+                    },
+                    "repository": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "dataset_id",
+                    "paper_title",
+                    "paper_url",
+                    "repository",
+                    "access_status",
+                    "local_path"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "sources",
+              "method_note",
+              "acquired",
+              "not_acquired",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "provenance_report": {
+            "$ref": "#/$defs/provenance_report"
+          }
+        },
+        "required": [
+          "provenance_report",
+          "artifacts"
+        ],
+        "title": "provenance_synthesis",
+        "type": "object"
+      }
+    },
+    "reproduction_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "reproduction_report": "reproduction_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "reproduction_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "laws_ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "effect_size_source": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "independence_axes": {
+                      "items": {
+                        "enum": [
+                          "region",
+                          "instrument",
+                          "method",
+                          "construct",
+                          "temporal",
+                          "population"
+                        ]
+                      },
+                      "type": "array"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "testability": {
+                      "enum": [
+                        "tested",
+                        "proxy_only",
+                        "untestable"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "outcome",
+                    "testability",
+                    "effect_size_source",
+                    "effect_size_observed",
+                    "independence_axes",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_failed_or_untestable": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_held": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "method_note",
+              "laws_ledger",
+              "what_held",
+              "what_failed_or_untestable",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/reproduction_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "reproduction_report": {
+            "$ref": "#/$defs/reproduction_report"
+          }
+        },
+        "required": [
+          "reproduction_report",
+          "artifacts"
+        ],
+        "title": "reproduction_synthesis",
+        "type": "object"
+      }
+    },
+    "testability_triage": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "testability_triage": "testability_triage"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "testability_triage": {
+            "additionalProperties": true,
+            "properties": {
+              "assessments": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "available_data": {
+                      "type": "string"
+                    },
+                    "gap": {
+                      "type": "string"
+                    },
+                    "proposed_test": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "metric": {
+                          "type": "string"
+                        },
+                        "success_threshold": {
+                          "type": "string"
+                        },
+                        "test": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "test",
+                        "metric",
+                        "success_threshold"
+                      ],
+                      "type": "object"
+                    },
+                    "required_data": {
+                      "type": "string"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "testable_now",
+                    "available_data",
+                    "required_data",
+                    "proposed_test",
+                    "gap"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "testable_theory_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "assessments",
+              "testable_theory_ids"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/testability_triage.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "testability_triage": {
+            "$ref": "#/$defs/testability_triage"
+          }
+        },
+        "required": [
+          "testability_triage",
+          "artifacts"
+        ],
+        "title": "testability_triage",
+        "type": "object"
+      }
+    },
+    "theory_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theories": [
+          "theory"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory": {
+            "additionalProperties": true,
+            "properties": {
+              "components": {
+                "additionalProperties": true,
+                "properties": {
+                  "generation_objective": {
+                    "type": "string"
+                  },
+                  "new_predictions_likely": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "new_predictions_unknown": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statements": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "conflicting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "statement_name": {
+                          "type": "string"
+                        },
+                        "supporting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "theory_statement": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "statement_name",
+                        "theory_statement",
+                        "supporting_evidence",
+                        "conflicting_evidence"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "unaccounted_for": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "generation_objective",
+                  "theory_statements",
+                  "new_predictions_likely",
+                  "new_predictions_unknown",
+                  "unaccounted_for"
+                ],
+                "type": "object"
+              },
+              "description": {
+                "type": "string"
+              },
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "name": {
+                "type": "string"
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "theory_query": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "name",
+              "description",
+              "theory_query",
+              "objective",
+              "grounds_law_ids",
+              "supporting_evidence_ids",
+              "components"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theories": {
+            "items": {
+              "$ref": "#/$defs/theory"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theories",
+          "artifacts"
+        ],
+        "title": "theory_formation",
+        "type": "object"
+      }
+    },
+    "theory_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_report": "theory_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "new_predictions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "novelty_summary": {
+                "type": "string"
+              },
+              "open_threads": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "theories": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "grounds_law_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "name": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "objective": {
+                      "enum": [
+                        "accuracy_focused",
+                        "novelty_focused"
+                      ]
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "supporting_evidence_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "name",
+                    "objective",
+                    "one_line",
+                    "grounds_law_ids",
+                    "novelty",
+                    "testable_now",
+                    "supporting_evidence_ids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theories",
+              "novelty_summary",
+              "new_predictions",
+              "open_threads",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_report": {
+            "$ref": "#/$defs/theory_report"
+          }
+        },
+        "required": [
+          "theory_report",
+          "artifacts"
+        ],
+        "title": "theory_synthesis",
+        "type": "object"
+      }
+    },
+    "verification_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "verification_report": "verification_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "verification_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "novelty_by_verification": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "audit_survived": {
+                      "type": "boolean"
+                    },
+                    "claim": {
+                      "type": "string"
+                    },
+                    "data_used": {
+                      "type": "string"
+                    },
+                    "effect_size": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome",
+                    "effect_size",
+                    "data_used",
+                    "audit_survived"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_could_not_be_tested": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_was_tested": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "novelty_by_verification",
+              "what_was_tested",
+              "what_could_not_be_tested",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/verification_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "verification_report": {
+            "$ref": "#/$defs/verification_report"
+          }
+        },
+        "required": [
+          "verification_report",
+          "artifacts"
+        ],
+        "title": "verification_synthesis",
+        "type": "object"
+      }
+    }
+  }
+}
diff --git a/skills/research-step/assets/compiled/gap_synthesis.schema.json b/skills/research-step/assets/compiled/gap_synthesis.schema.json
new file mode 100644
index 0000000..760fbb5
--- /dev/null
+++ b/skills/research-step/assets/compiled/gap_synthesis.schema.json
@@ -0,0 +1,221 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_gaps_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "arose_in": {
+                "type": "string"
+              },
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity",
+              "arose_in"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "gaps",
+        "next_steps",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/gap_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_gaps_report": {
+      "$ref": "#/$defs/data_gaps_report"
+    }
+  },
+  "required": [
+    "data_gaps_report",
+    "artifacts"
+  ],
+  "title": "gap_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/holdout_replication.schema.json b/skills/research-step/assets/compiled/holdout_replication.schema.json
new file mode 100644
index 0000000..9d18252
--- /dev/null
+++ b/skills/research-step/assets/compiled/holdout_replication.schema.json
@@ -0,0 +1,167 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/holdout_replication.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "figures",
+    "artifacts"
+  ],
+  "title": "holdout_replication",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
new file mode 100644
index 0000000..e996ef7
--- /dev/null
+++ b/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
@@ -0,0 +1,29 @@
+%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  literature_review["literature_review<br/>asta literature find · asta papers search"]
+  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph testing["testing (at replan)"]
+    testing__experiment_design["experiment_design<br/>asta experiment"]
+    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    testing__adjudicate["adjudicate"]
+  end
+  class testing replan
+  hypothesis_synthesis["hypothesis_synthesis"]
+  literature_review --> hypothesis_formation
+  hypothesis_formation --> testing__experiment_design
+  literature_review --> testing__experiment_design
+  testing__experiment_design --> testing__data_acquisition
+  testing__experiment_design --> testing__analysis
+  testing__data_acquisition --> testing__analysis
+  testing__analysis --> testing__audit
+  testing__experiment_design --> testing__adjudicate
+  testing__analysis --> testing__adjudicate
+  testing__audit --> testing__adjudicate
+  hypothesis_formation --> hypothesis_synthesis
+  testing --> hypothesis_synthesis
diff --git a/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/skills/research-step/assets/compiled/hypothesis_formation.schema.json
new file mode 100644
index 0000000..694d94f
--- /dev/null
+++ b/skills/research-step/assets/compiled/hypothesis_formation.schema.json
@@ -0,0 +1,126 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "hypothesis": {
+      "additionalProperties": true,
+      "properties": {
+        "falsifiable_prediction": {
+          "type": "string"
+        },
+        "grounds": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "rationale": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "rationale",
+        "falsifiable_prediction",
+        "grounds"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypotheses": {
+      "items": {
+        "$ref": "#/$defs/hypothesis"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "hypotheses",
+    "artifacts"
+  ],
+  "title": "hypothesis_formation",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
new file mode 100644
index 0000000..b2fe767
--- /dev/null
+++ b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
@@ -0,0 +1,224 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "hypothesis_report": {
+      "additionalProperties": true,
+      "properties": {
+        "answer": {
+          "type": "string"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "hypothesis_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "hypothesis_id",
+              "statement",
+              "outcome",
+              "effect_size_observed",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_questions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "question": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "question",
+        "ledger",
+        "answer",
+        "open_questions",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypothesis_report": {
+      "$ref": "#/$defs/hypothesis_report"
+    }
+  },
+  "required": [
+    "hypothesis_report",
+    "artifacts"
+  ],
+  "title": "hypothesis_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/law_extraction.schema.json b/skills/research-step/assets/compiled/law_extraction.schema.json
new file mode 100644
index 0000000..7b3e1fc
--- /dev/null
+++ b/skills/research-step/assets/compiled/law_extraction.schema.json
@@ -0,0 +1,139 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/law_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "law_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/literature_review.schema.json b/skills/research-step/assets/compiled/literature_review.schema.json
new file mode 100644
index 0000000..14df7b7
--- /dev/null
+++ b/skills/research-step/assets/compiled/literature_review.schema.json
@@ -0,0 +1,150 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "literature_review": {
+      "additionalProperties": true,
+      "properties": {
+        "citations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "corpus_id": {
+                "type": "number"
+              },
+              "id": {
+                "type": "string"
+              },
+              "relevance": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "corpus_id",
+              "title",
+              "url",
+              "relevance"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "key_findings": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_gaps": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "summary": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "summary",
+        "key_findings",
+        "open_gaps",
+        "citations"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/literature_review.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "literature_review": {
+      "$ref": "#/$defs/literature_review"
+    }
+  },
+  "required": [
+    "literature_review",
+    "artifacts"
+  ],
+  "title": "literature_review",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/novelty_assessment.schema.json b/skills/research-step/assets/compiled/novelty_assessment.schema.json
new file mode 100644
index 0000000..729f9fe
--- /dev/null
+++ b/skills/research-step/assets/compiled/novelty_assessment.schema.json
@@ -0,0 +1,147 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_evaluation": {
+      "additionalProperties": true,
+      "properties": {
+        "explanation": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "novelty": {
+          "enum": [
+            "established",
+            "derivable",
+            "genuinely_new"
+          ]
+        },
+        "overall_support": {
+          "enum": [
+            "supports",
+            "mixed",
+            "contradicts",
+            "inconclusive"
+          ]
+        },
+        "overall_support_raw": {
+          "type": "string"
+        },
+        "statement_evaluations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "statement_index": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "statement_index",
+              "novelty",
+              "explanation"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "theory_id",
+        "novelty",
+        "overall_support",
+        "explanation",
+        "statement_evaluations"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/novelty_assessment.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_evaluations": {
+      "items": {
+        "$ref": "#/$defs/theory_evaluation"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theory_evaluations",
+    "artifacts"
+  ],
+  "title": "novelty_assessment",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_extraction.schema.json b/skills/research-step/assets/compiled/provenance_extraction.schema.json
new file mode 100644
index 0000000..2bd4ea8
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_extraction.schema.json
@@ -0,0 +1,163 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "source_access": {
+      "additionalProperties": true,
+      "properties": {
+        "data_availability": {
+          "type": "string"
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "identifier": {
+          "type": "string"
+        },
+        "repository": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "data_availability",
+        "repository",
+        "identifier"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    },
+    "source_access": {
+      "items": {
+        "$ref": "#/$defs/source_access"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "source_access",
+    "artifacts"
+  ],
+  "title": "provenance_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_search.schema.json b/skills/research-step/assets/compiled/provenance_search.schema.json
new file mode 100644
index 0000000..8a924d9
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_search.schema.json
@@ -0,0 +1,107 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_source": {
+      "additionalProperties": true,
+      "properties": {
+        "dataset_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "paper_title": {
+          "type": "string"
+        },
+        "paper_url": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "dataset_id",
+        "paper_id",
+        "paper_title",
+        "paper_url"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_search.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_sources": {
+      "items": {
+        "$ref": "#/$defs/data_source"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "data_sources",
+    "artifacts"
+  ],
+  "title": "provenance_search",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/skills/research-step/assets/compiled/provenance_synthesis.schema.json
new file mode 100644
index 0000000..0d43a6f
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_synthesis.schema.json
@@ -0,0 +1,230 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "provenance_report": {
+      "additionalProperties": true,
+      "properties": {
+        "acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "not_acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sources": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "dataset_id",
+              "paper_title",
+              "paper_url",
+              "repository",
+              "access_status",
+              "local_path"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "sources",
+        "method_note",
+        "acquired",
+        "not_acquired",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "provenance_report": {
+      "$ref": "#/$defs/provenance_report"
+    }
+  },
+  "required": [
+    "provenance_report",
+    "artifacts"
+  ],
+  "title": "provenance_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/reproduction.mmd b/skills/research-step/assets/compiled/reproduction.mmd
new file mode 100644
index 0000000..4bb9e6e
--- /dev/null
+++ b/skills/research-step/assets/compiled/reproduction.mmd
@@ -0,0 +1,29 @@
+%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+  law_extraction["law_extraction"]
+  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+  subgraph replication["replication (at replan)"]
+    replication__experiment_design["experiment_design<br/>asta experiment"]
+    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    replication__adjudicate["adjudicate"]
+  end
+  class replication replan
+  reproduction_synthesis["reproduction_synthesis"]
+  data_driven_discovery --> law_extraction
+  law_extraction --> evidence_gathering
+  law_extraction --> replication__experiment_design
+  evidence_gathering --> replication__experiment_design
+  replication__experiment_design --> replication__analysis
+  evidence_gathering --> replication__analysis
+  replication__analysis --> replication__audit
+  replication__experiment_design --> replication__adjudicate
+  replication__analysis --> replication__adjudicate
+  replication__audit --> replication__adjudicate
+  law_extraction --> reproduction_synthesis
+  replication --> reproduction_synthesis
diff --git a/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
new file mode 100644
index 0000000..570e076
--- /dev/null
+++ b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
@@ -0,0 +1,253 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "reproduction_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "laws_ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "outcome",
+              "testability",
+              "effect_size_source",
+              "effect_size_observed",
+              "independence_axes",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_failed_or_untestable": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_held": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "method_note",
+        "laws_ledger",
+        "what_held",
+        "what_failed_or_untestable",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/reproduction_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "reproduction_report": {
+      "$ref": "#/$defs/reproduction_report"
+    }
+  },
+  "required": [
+    "reproduction_report",
+    "artifacts"
+  ],
+  "title": "reproduction_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/testability_triage.schema.json b/skills/research-step/assets/compiled/testability_triage.schema.json
new file mode 100644
index 0000000..8968920
--- /dev/null
+++ b/skills/research-step/assets/compiled/testability_triage.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "testability_triage": {
+      "additionalProperties": true,
+      "properties": {
+        "assessments": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "available_data": {
+                "type": "string"
+              },
+              "gap": {
+                "type": "string"
+              },
+              "proposed_test": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "testable_now",
+              "available_data",
+              "required_data",
+              "proposed_test",
+              "gap"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "testable_theory_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "assessments",
+        "testable_theory_ids"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/testability_triage.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "testability_triage": {
+      "$ref": "#/$defs/testability_triage"
+    }
+  },
+  "required": [
+    "testability_triage",
+    "artifacts"
+  ],
+  "title": "testability_triage",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/theorizer.mmd b/skills/research-step/assets/compiled/theorizer.mmd
new file mode 100644
index 0000000..59e2d0f
--- /dev/null
+++ b/skills/research-step/assets/compiled/theorizer.mmd
@@ -0,0 +1,27 @@
+%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+  subgraph theory_generation["theory_generation"]
+    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+  end
+  testability_triage["testability_triage"]
+  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+  theory_synthesis["theory_synthesis"]
+  ext__adjudicate(["adjudicate (external)"]):::external
+  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
+  ext__evidence_gathering(["evidence_gathering (external)"]):::external
+  ext__law_extraction(["law_extraction (external)"]):::external
+  ext__law_extraction -.-> evidence_extraction
+  ext__adjudicate -.-> evidence_extraction
+  evidence_extraction --> theory_generation__theory_formation
+  theory_generation --> testability_triage
+  ext__data_driven_discovery -.-> testability_triage
+  ext__evidence_gathering -.-> testability_triage
+  testability_triage --> novelty_assessment
+  theory_generation --> theory_synthesis
+  novelty_assessment --> theory_synthesis
+  testability_triage --> theory_synthesis
diff --git a/skills/research-step/assets/compiled/theory_formation.schema.json b/skills/research-step/assets/compiled/theory_formation.schema.json
new file mode 100644
index 0000000..7373cec
--- /dev/null
+++ b/skills/research-step/assets/compiled/theory_formation.schema.json
@@ -0,0 +1,240 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory": {
+      "additionalProperties": true,
+      "properties": {
+        "components": {
+          "additionalProperties": true,
+          "properties": {
+            "generation_objective": {
+              "type": "string"
+            },
+            "new_predictions_likely": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "new_predictions_unknown": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "theory_statements": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "statement_name": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statement": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "statement_name",
+                  "theory_statement",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            },
+            "unaccounted_for": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "text": {
+                    "type": "string"
+                  },
+                  "uuids": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "text",
+                  "uuids"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "generation_objective",
+            "theory_statements",
+            "new_predictions_likely",
+            "new_predictions_unknown",
+            "unaccounted_for"
+          ],
+          "type": "object"
+        },
+        "description": {
+          "type": "string"
+        },
+        "grounds_law_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "objective": {
+          "enum": [
+            "accuracy_focused",
+            "novelty_focused"
+          ]
+        },
+        "supporting_evidence_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "theory_query": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "name",
+        "description",
+        "theory_query",
+        "objective",
+        "grounds_law_ids",
+        "supporting_evidence_ids",
+        "components"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theories": {
+      "items": {
+        "$ref": "#/$defs/theory"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theories",
+    "artifacts"
+  ],
+  "title": "theory_formation",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/theory_synthesis.schema.json b/skills/research-step/assets/compiled/theory_synthesis.schema.json
new file mode 100644
index 0000000..dd2768e
--- /dev/null
+++ b/skills/research-step/assets/compiled/theory_synthesis.schema.json
@@ -0,0 +1,280 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "new_predictions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "novelty_summary": {
+          "type": "string"
+        },
+        "open_threads": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "theories": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "name": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "name",
+              "objective",
+              "one_line",
+              "grounds_law_ids",
+              "novelty",
+              "testable_now",
+              "supporting_evidence_ids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theories",
+        "novelty_summary",
+        "new_predictions",
+        "open_threads",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_report": {
+      "$ref": "#/$defs/theory_report"
+    }
+  },
+  "required": [
+    "theory_report",
+    "artifacts"
+  ],
+  "title": "theory_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/verification_synthesis.schema.json b/skills/research-step/assets/compiled/verification_synthesis.schema.json
new file mode 100644
index 0000000..8d1a639
--- /dev/null
+++ b/skills/research-step/assets/compiled/verification_synthesis.schema.json
@@ -0,0 +1,232 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "verification_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "novelty_by_verification": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "audit_survived": {
+                "type": "boolean"
+              },
+              "claim": {
+                "type": "string"
+              },
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome",
+              "effect_size",
+              "data_used",
+              "audit_survived"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_could_not_be_tested": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_was_tested": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "novelty_by_verification",
+        "what_was_tested",
+        "what_could_not_be_tested",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/verification_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "verification_report": {
+      "$ref": "#/$defs/verification_report"
+    }
+  },
+  "required": [
+    "verification_report",
+    "artifacts"
+  ],
+  "title": "verification_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/schemas.yaml b/skills/research-step/assets/schemas.yaml
index b9643b3..b5ead12 100644
--- a/skills/research-step/assets/schemas.yaml
+++ b/skills/research-step/assets/schemas.yaml
@@ -1,48 +1,105 @@
-version: 1
+version: 2
+
+config:
+  # Session-tunable knobs and their defaults. A mission.md may override any of
+  # them in a `## Config` section (one `key: value` line each). plan's bootstrap
+  # resolves defaults + mission overrides and pins the result on the epic root
+  # (metadata.research_step.config); execute reads the pinned values from the
+  # epic root and passes them into the chain commands. Names match the field the
+  # consuming agent actually takes.
+  n_experiments: 10                # auto-ds: experiments per discovery run; set in the run-metadata
+                                   # JSON given to `asta autodiscovery metadata` (data_driven_discovery
+                                   # fresh runs, cohort_assembly/discovery_run)
+  max_papers_to_retrieve: 30       # generate-theories find-and-extract: papers to extract from
+                                   # (provenance_extraction, evidence_extraction, hypothesis_formation)
+  max_parallel_dv_runs: 5          # cap on concurrent DataVoyager (analyze-data) submissions when a
+                                   # step fans out runs in parallel (holdout_replication, analysis
+                                   # batches); submit up to this many, then wait before submitting more
 
 enums:
-  outcome:               [held, partial, failed, n/a]
+  outcome:               [held, partial, failed, underpowered, n/a]   # the one verdict vocabulary, for laws, theories, and hypotheses
   testability:           [tested, proxy_only, untestable]
   construct_equivalence: [equivalent, proxy, mismatch]
   feasibility:           [feasible, proxy_only, data_unavailable, construct_mismatch]
   independence_axis:     [region, instrument, method, construct, temporal, population]
   generation_objective:  [accuracy_focused, novelty_focused]
-  verification_verdict:  [confirmed, refuted, mixed, inconclusive]
+  subject_kind:          [empirical_law, theory, hypothesis]
   novelty:               [established, derivable, genuinely_new]
-  next_step_kind:        [auto_ds, reproduction, theorizer, evidence_gathering, data_acquisition, verification, analysis, literature_review]
+  support_level:         [supports, mixed, contradicts, inconclusive]
   priority:              [high, medium, low]
   access_status:         [acquired, open_unfetched, restricted, not_found]
-  holdout_verdict:       [held, failed, untested]
 
 types:
 
-  artifact:
-    artifactId: string
+  # Records are immutable: a task emits a record once; later stages never re-emit
+  # it with new values. Verdicts, enrichments, and acquisition results are their
+  # own records referencing the original by id (adjudication -> subject_id,
+  # source_access/acquisition -> data_source_id).
+  #
+  # Agent outputs nest VERBATIM: when a type carries another agent's record
+  # (theory.components, experiment rows, mcts_provenance), the agent's object is
+  # stored unmodified under its key - orchestrator annotations wrap it, never
+  # reach into or rename inside it - so a real agent payload always slots in.
+  # validate-output.sh deep-validates against the compiled JSON Schemas
+  # (assets/compiled/, regenerated by scripts/compile-schemas.py at build time):
+  # top-level output keys are closed, but nested objects stay open, so extra
+  # nested fields from real payloads are always permitted. A field name ending
+  # in `?` (e.g. mcts_provenance?) is optional; unmarked fields are required.
+
+  # --- Artifacts. The `artifacts` key on every task holds A2A 1.0 Artifacts,
+  # exactly as the spec defines them: an artifact is an array of typed `parts`
+  # (wire field names, camelCase). A2A artifacts returned by chain commands are
+  # stored as received; locally produced byproducts (a rendered figure, a script,
+  # a data file) are wrapped in the same shape as file parts. Conventions on top
+  # of the spec:
+  #   - agents tag the artifact kind in metadata.type, e.g. extraction-schema |
+  #     extraction | theory | novelty | theory_store (theorizer) ·
+  #     paper-finder-search-result · widget_data_voyager (DV); local byproducts
+  #     use figure | code | data | log | experiment-design.
+  #   - local files are file parts in the *uri* form, uri = repo-root-relative
+  #     path under .asta/<agent>/<slug>/, with a mimeType (image/png,
+  #     text/x-python, text/csv, text/markdown, ...).
+  #   - never put the *bytes* form in output_json - beads caps metadata at ~64KB;
+  #     base64 payloads from agents (e.g. DV figures) are written to disk first
+  #     and referenced by uri.
+  # Byproducts always travel this channel; a thing the contract *requires*
+  # (e.g. an analysis's figures) is a typed output key.
+
+  artifact:                          # A2A 1.0 Artifact, verbatim
+    artifactId: string               # unique within the task (e.g. UUID, or <issue-id>-<n> for local byproducts)
     name: string
     description: string
-    parts: [object]
-    metadata: object
-
-  experiment:
-    experiment_id: string
-    status: string
-    hypothesis: string
+    parts: [part]
+    metadata?: object                # optional; metadata.type carries the artifact kind
+    extensions?: [string]            # optional; URIs of relevant A2A extensions
+
+  part:                              # A2A Part union, discriminated by `kind`
+    kind: string                     # text | file | data
+    metadata?: object                # optional, per part
+    # text: {kind: text, text: string}
+    # file: {kind: file, file: {uri: string, mimeType: string, name: string}}     - the only file form allowed in output_json
+    #       {kind: file, file: {bytes: base64, mimeType: string, name: string}}   - wire/disk only, never in output_json
+    # data: {kind: data, data: object} - structured payloads, stored as received
+
+  figure:                            # the report-embedding form: image is a repo-root-relative path
+    caption: string                  # (PNG/SVG), embedded via ![caption](path)
+    image: string
+
+  experiment:                        # an auto-ds experiments.json record; these four fields are the
+    experiment_id: string            # required projection - paste the full record in unchanged (extras
+    status: string                   # like experiment_plan, code, review, prior/posterior beliefs are
+    hypothesis: string               # permitted and preserved)
     analysis: string
 
-  empirical_law:
-    id: string
+  empirical_law:                     # identity of a discovered law; its verdict lives in the
+    id: string                       # adjudication that references it, never here
     statement: string
     construct: string
     source_operationalization: string
     source_node: string
-    mcts_provenance: {surprisal: number, value: number, visits: number, belief_change: number}
+    effect_size_source: string       # the effect size as the source run/paper claims it
     grouping_rationale: string
-    outcome: outcome                       
-    testability: testability              
-    independence_axes: [independence_axis]
-    effect_size_source: string
-    effect_size_reproduction: string
-    replication_path: string
+    mcts_provenance?: {surprise: number, is_surprising: boolean, prior_belief: object, posterior_belief: object}   # optional; the auto-ds experiment record's search-signal fields, verbatim
 
   dataset:
     id: string
@@ -53,18 +110,25 @@ types:
     variables: [string]
     covers_laws: [string]
 
-  data_source:                       # links a run dataset to the paper and repository it came from
+  data_source:                       # the paper behind a run dataset; emitted once by provenance_search
     id: string
     dataset_id: string               # which run dataset this sources (e.g. ds_alaska_elas)
     paper_id: string                 # source paper (Semantic Scholar sha / corpus id)
     paper_title: string
     paper_url: string
+
+  source_access:                     # provenance_extraction's enrichment, keyed by data_source id
+    data_source_id: string
     data_availability: string        # the paper's data-availability statement, verbatim or summarized
     repository: string               # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA
     identifier: string               # DOI / accession / direct URL for the data
+
+  acquisition:                       # data_acquisition's result, keyed by data_source id
+    data_source_id: string
     access_status: access_status     # acquired | open_unfetched | restricted | not_found
     local_path: string               # repo-root-relative path once acquired (else empty)
-    covers_laws: [string]
+    dataset_id: string               # the dataset registered from this source (empty if not acquired)
+    validation_note: string          # QC against the paper - n, schema/variables, units, missingness - or why not validated
 
   cohort:                            # the data a fresh auto-ds discovery runs against (auto_discovery flow)
     id: string
@@ -77,31 +141,46 @@ types:
     holdout_subset: {definition: string, n: number, path: string}     # independent, held back for replication
     run_id: string                   # the stood-up auto-ds run (autodiscovery create)
 
-  reproduction_design:
-    law_id: string
+  experiment_design:                      # one test, committed before its analysis runs; used by the
+    subject_kind: subject_kind       # replication (law) and testing (hypothesis) branches
+    subject_id: string               # the law / theory / hypothesis under test
     experiment_name: string
     plain_language_description: string
-    original_operationalization: string
+    source_operationalization: string      # how the source measured it (empty for a novel hypothesis)
     independent_operationalization: string
     construct_equivalence: construct_equivalence
     feasibility: feasibility
     required_data: string
     data_gap: string
-
-  analysis:
-    final_answer: string
-    assumptions: [string]
-    figures: [{caption: string, image: string}]
+    experiment_design_query: string  # the natural-language query sent to the experiment designer (input provenance; empty when no designer ran)
+    prespecified:                    # the commitment adjudicate checks the result against
+      test: string                   # the statistical test / model
+      metric: string                 # the quantity that decides it
+      success_threshold: string      # what counts as held, incl. direction; note expected power / min detectable effect if known
+
+  analysis:                          # DataVoyager's TaskSummary, verbatim (figures are hoisted to the
+    final_answer: string             # task's `figures` output key after imageb64 -> PNG conversion)
+    assumptions: string              # a single text block, as the agent emits it
     code: string
 
   audit_report:
-    subject_id: string                     
-    analysis_id: string
-    challenges: [{concern: string, check: string, outcome: string}]
+    subject_id: string               # the law / theory / hypothesis whose analysis was audited
+    challenges: [{concern: string, check: string, outcome: string}]   # include one negative-control check (e.g. shuffled predictor)
     artifacts_found: [string]
     verdict_survives: boolean
     recommended_adjustment: string
 
+  adjudication:                      # the verdict record; references its subject, never mutates it
+    subject_kind: subject_kind
+    subject_id: string
+    outcome: outcome                 # held | partial | failed | underpowered | n/a
+    testability: testability
+    effect_size_observed: string
+    prespecified_check: string       # the observed metric vs the committed success_threshold
+    independence_axes: [independence_axis]
+    data_used: string
+    evidence: string
+
   extracted_data:
     id: string
     run_id: string
@@ -114,15 +193,29 @@ types:
         citation_title: string
         uuid: string
 
+  literature_review:                 # hypothesis_driven_research's survey output
+    summary: string
+    key_findings: [{text: string, uuids: [string]}]
+    open_gaps: [string]              # gaps that motivate hypotheses
+    citations: [{id: string, corpus_id: number, title: string, url: string, relevance: string}]   # corpus_id = canonical S2 corpusId; rows convert mechanically to PaperEntry seeds
+
+  hypothesis:                        # a slim, directly testable claim (hypothesis_driven_research)
+    id: string
+    statement: string
+    rationale: string                # why the literature implies it
+    falsifiable_prediction: string
+    grounds: [{text: string, uuids: [string]}]   # the evidence the rationale rests on
+
   theory:
     id: string
     name: string
     description: string
     theory_query: string
-    objective: generation_objective
-    grounds_law_ids: [string]
-    supporting_evidence_ids: [string]
-    components:
+    objective: generation_objective  # orchestrator annotation (the generation branch); the agent's own copy is components.generation_objective
+    grounds_law_ids: [string]        # orchestrator annotation - which laws ground this theory (no agent equivalent)
+    supporting_evidence_ids: [string]   # orchestrator annotation
+    components:                      # the theorizer's theory record, carried VERBATIM - never flatten or edit
+      generation_objective: string   # the agent's value as emitted (e.g. accuracy-focused)
       theory_statements:
         - statement_name: string
           theory_statement: string
@@ -138,28 +231,24 @@ types:
         testable_now: boolean
         available_data: string
         required_data: string
-        proposed_test: string
+        proposed_test: {test: string, metric: string, success_threshold: string}   # prespecified; the verification branch's adjudicate checks against it
         gap: string
     testable_theory_ids: [string]
 
   theory_evaluation:
     id: string
     theory_id: string
-    novelty: novelty
-    overall_support_or_contradict: string
-    overall_support_or_contradict_explanation: string
-
-  verification:
-    theory_id: string
-    prediction: string
-    verdict: verification_verdict
-    effect_size: string
-    data_used: string
-    audit_survived: boolean
-    analysis_id: string
+    novelty: novelty                 # rollup across statement_evaluations - the most novel statement wins
+    overall_support: support_level
+    overall_support_raw?: string     # the agent's untyped judgment, verbatim (optional)
+    explanation: string
+    statement_evaluations:           # the agent's real granularity - novelty is scored per statement
+      - statement_index: number
+        novelty: novelty
+        explanation: string
 
   next_run_proposal:
-    kind: next_step_kind
+    kind: string                     # any flows: or tasks: key in this file
     title: string
     tests: [string]
     data_needed: string
@@ -167,11 +256,12 @@ types:
     priority: priority
 
   # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report,
-  # theory_report, verification_report), one standalone data-gaps report, and a
-  # theory-led master (research_report). Each carries report_path (the .md deliverable
-  # written first), a title, a one-line headline, a typed body, and `links` back to the
-  # artifacts, tasks, and papers it rests on. Each sub-flow report exposes a local
-  # `gaps` list that gap_synthesis aggregates into the data_gaps_report.
+  # theory_report, verification_report, hypothesis_report, discovery_report), one
+  # standalone data-gaps report, and a theory-led master (research_report). Each
+  # carries report_path (the .md deliverable written first), a title, a one-line
+  # headline, a typed body, and `links` back to the artifacts, tasks, and papers it
+  # rests on. Each sub-flow report exposes a local `gaps` list that gap_synthesis
+  # aggregates into the data_gaps_report.
 
   provenance_report:
     report_path: string
@@ -184,8 +274,10 @@ types:
         repository: string
         access_status: access_status
         local_path: string
+    method_note: string              # how sources were matched and the data merged/validated (e.g. join key, resulting n vs the run's n)
     acquired: [string]
     not_acquired: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -200,11 +292,12 @@ types:
         outcome: outcome
         testability: testability
         effect_size_source: string
-        effect_size_reproduction: string
+        effect_size_observed: string
         independence_axes: [independence_axis]
         evidence: string
     what_held: [string]
     what_failed_or_untestable: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -225,6 +318,7 @@ types:
     novelty_summary: string
     new_predictions: [string]
     open_threads: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -236,12 +330,30 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verdict: verification_verdict
+        outcome: outcome
         effect_size: string
         data_used: string
         audit_survived: boolean
     what_was_tested: string
     what_could_not_be_tested: [string]
+    figures: [figure]
+    gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
+    links: [{label: string, ref: string}]
+
+  hypothesis_report:                 # synthesis output of the hypothesis_driven_research flow
+    report_path: string
+    title: string
+    headline: string
+    question: string                 # the research question from mission.md
+    ledger:
+      - hypothesis_id: string
+        statement: string
+        outcome: outcome
+        effect_size_observed: string
+        evidence: string
+    answer: string                   # what the verdicts say about the question
+    open_questions: [string]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
@@ -256,6 +368,7 @@ types:
         severity: priority
         arose_in: string
     next_steps: [next_run_proposal]
+    figures: [figure]
     links: [{label: string, ref: string}]
 
   research_report:
@@ -267,55 +380,76 @@ types:
       - theory_id: string
         claim: string
         novelty: novelty
-        verification: verification_verdict
+        outcome: outcome
     inference_chain: [{claim: string, chain: [string]}]
     what_was_done: [string]
     sub_reports: [{kind: string, report_path: string, one_line: string}]
     tensions_and_surprises: [{observation: string, where: string, evidence: string}]
+    figures: [figure]                # at least the one decisive figure, embedded in the report
+    links: [{label: string, ref: string}]
 
   discovery_report:                  # synthesis output of the auto_discovery flow
     report_path: string
     title: string
     headline: string
+    run_id: string                   # the discovery run, with its cohort sizes in the report header
     laws:
       - law_id: string
         statement: string
         surprise: number             # the discovery run's surprise signal for this candidate law
-        holdout_verdict: holdout_verdict   # held | failed | untested (from the held-out replication)
+        outcome: outcome             # from the held-out replication (untested branches are n/a)
         deciding_experiment: string  # the held-out DataVoyager run/analysis that decided the verdict
-        effect_size: string
+        effect_size_discovery: string   # on the discovery subset
+        effect_size_holdout: string     # on the held-out subset - the pair shows replication shrinkage
+    interpretation: string           # what the run means against the question that motivated it
     next_steps: [next_run_proposal]
+    figures: [figure]
     gaps: [{item: string, missing_data: string, blocks: string, severity: priority}]
     links: [{label: string, ref: string}]
 
+# Tasks are pure output contracts: output maps each output_json key to its type,
+# [type] meaning a JSON array of that type. Every task also carries artifacts.
+# A task's inputs are declared per flow step (the same output shape takes
+# different inputs in different flows), under `input:` in the flows below.
+
 tasks:
-  provenance_search:      {input: [],                                     output: [data_source, artifacts]}
-  provenance_extraction:  {input: [data_source],                          output: [extracted_data, data_source, artifacts]}
-  data_acquisition:       {input: [data_source],                          output: [dataset, data_source, artifacts]}
-  provenance_synthesis:   {input: [data_source, dataset],                 output: [provenance_report, artifacts]}
-  data_driven_discovery:  {input: [],                                     output: [experiment, dataset, artifacts]}
-  law_extraction:         {input: [experiment],                           output: [empirical_law, artifacts]}
-  evidence_gathering:     {input: [empirical_law],                        output: [dataset, artifacts]}
-  reproduction_design:    {input: [empirical_law, dataset],               output: [reproduction_design, artifacts]}
-  analysis:               {input: [reproduction_design, dataset],         output: [analysis, artifacts]}
-  reproduction_audit:     {input: [analysis],                             output: [audit_report, artifacts]}
-  reproduce:              {input: [reproduction_design, analysis, audit_report], output: [empirical_law, artifacts]}
-  reproduction_synthesis: {input: [empirical_law],                        output: [reproduction_report, artifacts]}
-  evidence_extraction:    {input: [empirical_law],                        output: [extracted_data, artifacts]}
-  theory_formation:       {input: [extracted_data, empirical_law],        output: [theory, artifacts]}
-  testability_triage:     {input: [theory, dataset],                      output: [testability_triage, artifacts]}
-  novelty_assessment:     {input: [testability_triage, theory],           output: [theory_evaluation, artifacts]}
-  theory_synthesis:       {input: [theory, theory_evaluation, testability_triage], output: [theory_report, artifacts]}
-  theory_audit:           {input: [analysis],                             output: [audit_report, artifacts]}
-  theory_verification:    {input: [theory, analysis, audit_report],        output: [verification, artifacts]}
-  verification_synthesis: {input: [verification, theory_evaluation],       output: [verification_report, artifacts]}
-  gap_synthesis:          {input: [provenance_report, reproduction_report, theory_report, verification_report], output: [data_gaps_report, artifacts]}
-  final_synthesis:        {input: [provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report], output: [research_report, artifacts]}
-  # auto_discovery flow (a distinct top-level epic: source a cohort, run a fresh discovery, replicate on held-out data)
-  cohort_assembly:        {input: [],                      output: [cohort, dataset, artifacts]}
-  discovery_run:          {input: [cohort],                output: [experiment, empirical_law, artifacts]}
-  holdout_replication:    {input: [empirical_law, cohort], output: [empirical_law, artifacts]}
-  discovery_synthesis:    {input: [empirical_law],         output: [discovery_report, artifacts]}
+  provenance_search:      {output: {data_sources: [data_source], artifacts: [artifact]}}
+  provenance_extraction:  {output: {extracted_data: extracted_data, source_access: [source_access], artifacts: [artifact]}}
+  data_acquisition:       {output: {acquisitions: [acquisition], datasets: [dataset], artifacts: [artifact]}}
+  provenance_synthesis:   {output: {provenance_report: provenance_report, artifacts: [artifact]}}
+  data_driven_discovery:  {output: {experiments: [experiment], datasets: [dataset], artifacts: [artifact]}}
+  law_extraction:         {output: {empirical_laws: [empirical_law], artifacts: [artifact]}}
+  evidence_gathering:     {output: {datasets: [dataset], artifacts: [artifact]}}
+  experiment_design:           {output: {experiment_design: experiment_design, artifacts: [artifact]}}
+  analysis:               {output: {analysis: analysis, figures: [figure], artifacts: [artifact]}}
+  audit:                  {output: {audit_report: audit_report, artifacts: [artifact]}}
+  adjudicate:             {output: {adjudication: adjudication, artifacts: [artifact]}}
+  reproduction_synthesis: {output: {reproduction_report: reproduction_report, artifacts: [artifact]}}
+  evidence_extraction:    {output: {extracted_data: extracted_data, artifacts: [artifact]}}
+  theory_formation:       {output: {theories: [theory], artifacts: [artifact]}}
+  testability_triage:     {output: {testability_triage: testability_triage, artifacts: [artifact]}}
+  novelty_assessment:     {output: {theory_evaluations: [theory_evaluation], artifacts: [artifact]}}
+  theory_synthesis:       {output: {theory_report: theory_report, artifacts: [artifact]}}
+  verification_synthesis: {output: {verification_report: verification_report, artifacts: [artifact]}}
+  gap_synthesis:          {output: {data_gaps_report: data_gaps_report, artifacts: [artifact]}}
+  final_synthesis:        {output: {research_report: research_report, artifacts: [artifact]}}
+  # hypothesis_driven_research flow
+  literature_review:      {output: {literature_review: literature_review, artifacts: [artifact]}}
+  hypothesis_formation:   {output: {hypotheses: [hypothesis], artifacts: [artifact]}}
+  hypothesis_synthesis:   {output: {hypothesis_report: hypothesis_report, artifacts: [artifact]}}
+  # auto_discovery flow (its own session in a separate workspace: source a cohort, run a fresh discovery, replicate on held-out data)
+  cohort_assembly:        {output: {cohort: cohort, datasets: [dataset], artifacts: [artifact]}}
+  discovery_run:          {output: {experiments: [experiment], empirical_laws: [empirical_law], artifacts: [artifact]}}
+  holdout_replication:    {output: {adjudication: adjudication, figures: [figure], artifacts: [artifact]}}
+  discovery_synthesis:    {output: {discovery_report: discovery_report, artifacts: [artifact]}}
+
+# Each flow step carries: mission (what the work is), input (the upstream steps
+# in this session whose issues plan wires as the task's inputs), and chain (the
+# asta commands). A node with a chain is a step; a node with only child nodes
+# and a mission is a group; a chain item {workflow: <flow>, mission: <text>}
+# expands the named sub-flow inline. A group whose branches are created at
+# replan (one per law / theory / hypothesis, once the naming step closes)
+# declares `replan: true`.
 
 flows:
 
@@ -334,103 +468,171 @@ flows:
       chain:
         - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.}
     verification:
-      mission: One branch per theory that testability_triage marked testable. There is no design step here - the proposed_test from triage feeds analysis directly. The branch count is known only after triage closes, so these branches are created at replan.
+      mission: One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.
+      replan: true
       analysis:
-        mission: Run the theory's proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets.
+        mission: Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [testability_triage, data_driven_discovery, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_audit:
-        mission: Try to refute the verification analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      theory_verification:
-        mission: Finalize the prediction verdict (confirmed, refuted, mixed, or inconclusive) and its effect size from the analysis and audit.
+      adjudicate:
+        mission: Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.
+        input: [testability_triage, analysis, audit]
         chain: []
     verification_synthesis:
-      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, verdict, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Carry any gaps in `gaps`.
+      mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.
+      input: [verification, novelty_assessment]
       chain: []
     gap_synthesis:
-      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis]
       chain: []
     final_synthesis:
-      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and verification verdict; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, and tensions_and_surprises. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.
+      input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis, gap_synthesis]
       chain: []
 
   data_provenance:
     mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.
     provenance_search:
-      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url) with access_status not yet determined.
+      mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).
+      input: []
       chain: [asta literature find, asta papers search]
     provenance_extraction:
-      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Fill these into each data_source.
+      mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.
+      input: [provenance_search]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     data_acquisition:
-      mission: For each data_source that is openly available, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Set access_status (acquired, open_unfetched, restricted, or not_found) and local_path. For restricted or not-found data, record a gap rather than blocking downstream work.
+      mission: For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.
+      input: [provenance_search, provenance_extraction]
       chain: [asta documents, asta autodiscovery upload]
     provenance_synthesis:
-      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate).
+      mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.
+      input: [provenance_search, provenance_extraction, data_acquisition]
       chain: []
 
   reproduction:
-    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch analysis, not the ingested run.
+    mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.
     data_driven_discovery:
-      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one. Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.
+      input: []
       chain: [asta autodiscovery run, asta autodiscovery experiments]
     law_extraction:
-      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law.
+      mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.
+      input: [data_driven_discovery]
       chain: []
     evidence_gathering:
-      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Emit a dataset registry that tags which laws each dataset can test.
+      mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.
+      input: [law_extraction]
       chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload]
     replication:
       mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.
-      reproduction_design:
-        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility. If feasible or proxy_only, the branch proceeds to analysis. If data_unavailable or construct_mismatch, record the data_gap, finalize the law as outcome n/a and testability untestable, and open a data_acquisition issue that blocks the analysis that would otherwise run.
+      replan: true
+      experiment_design:
+        mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.
+        input: [law_extraction, evidence_gathering]
         chain: [asta experiment]
       analysis:
-        mission: Run the reproduction on the acquired data. Effect size and outcome come from here.
+        mission: Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, evidence_gathering]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduction_audit:
-        mission: Try to refute the analysis or find artifacts before its verdict stands.
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
         chain: [asta analyze-data submit, asta analyze-data poll]
-      reproduce:
-        mission: Finalize the law's two-axis verdict, independence axes, and reproduction effect size from the analysis and audit; or outcome n/a, testability untestable when the branch was infeasible.
+      adjudicate:
+        mission: Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.
+        input: [experiment_design, analysis, audit]
         chain: []
     reproduction_synthesis:
-      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.
+      input: [law_extraction, replication]
       chain: []
 
   theorizer:
     mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.
     evidence_extraction:
-      mission: Shared across both objective branches. Consume the reproduced laws (the empirical_law records reproduce finalized, with outcome and testability filled - not the pre-reproduction candidates from law_extraction). Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      mission: Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.
+      input: [law_extraction, adjudicate]
       chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
     theory_generation:
       mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.
       theory_formation:
         mission: Form theories from the shared extraction store under this branch's objective.
+        input: [evidence_extraction]
         chain: [asta generate-theories form-theory]
     testability_triage:
-      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now, with the proposed_test for each. Theories needing new data carry a gap routed to next_steps.
+      mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.
+      input: [theory_generation, data_driven_discovery, evidence_gathering]
       chain: []
     novelty_assessment:
       mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories.
+      input: [testability_triage]
       chain: [asta generate-theories evaluate-novelty]
     theory_synthesis:
       mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.
+      input: [theory_generation, novelty_assessment, testability_triage]
+      chain: []
+
+  hypothesis_driven_research:
+    mission: Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.
+    literature_review:
+      mission: Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.
+      input: []
+      chain: [asta literature find, asta papers search]
+    hypothesis_formation:
+      mission: Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.
+      input: [literature_review]
+      chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract]
+    testing:
+      mission: One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.
+      replan: true
+      experiment_design:
+        mission: Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.
+        input: [hypothesis_formation, literature_review]
+        chain: [asta experiment]
+      data_acquisition:
+        mission: Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.
+        input: [experiment_design]
+        chain: [asta documents, asta autodiscovery upload]
+      analysis:
+        mission: Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [experiment_design, data_acquisition]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      audit:
+        mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.
+        input: [analysis]
+        chain: [asta analyze-data submit, asta analyze-data poll]
+      adjudicate:
+        mission: Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.
+        input: [experiment_design, analysis, audit]
+        chain: []
+    hypothesis_synthesis:
+      mission: Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.
+      input: [hypothesis_formation, testing]
       chain: []
 
   auto_discovery:
-    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own top-level epic; the research question (the intent) comes from mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
+    mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.
     cohort_assembly:
-      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.
+      input: []
       chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata]
     discovery_run:
-      mission: Run discovery against the original question with the cohort as data (10 experiments). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      mission: Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.
+      input: [cohort_assembly]
       chain: [asta autodiscovery submit, asta autodiscovery experiments]
     replication:
       mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.
+      replan: true
       holdout_replication:
-        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel. The verdict (held, failed, or untested) comes from this replication, not from the discovery run. Finalize the law's outcome from the held-out result.
+        mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.
+        input: [discovery_run, cohort_assembly]
         chain: [asta analyze-data submit, asta analyze-data poll]
     discovery_synthesis:
-      mission: Fan the branches in. Write discovery_report - give each law its held-out verdict (held, failed, or untested) with the experiment that decided it and its effect size, then propose next_steps. A failed law is a result, not a gap.
+      mission: Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.
+      input: [discovery_run, replication]
       chain: []
diff --git a/skills/research-step/scripts/close-task.sh b/skills/research-step/scripts/close-task.sh
index 673b23f..7535a38 100755
--- a/skills/research-step/scripts/close-task.sh
+++ b/skills/research-step/scripts/close-task.sh
@@ -16,7 +16,8 @@ jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2;
 cur="$(bd show "$id" --json | jq -c '.[0].metadata')"
 merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \
   '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")"
-tmp="$(mktemp)"; printf '%s' "$merged" > "$tmp"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$merged" > "$tmp"
 bd update "$id" --metadata @"$tmp" >/dev/null
 
 # 2. validate structurally (reads the issue back; no style lint)
@@ -28,17 +29,25 @@ bd close "$id" >/dev/null
   || { echo "close-task: $id did not close" >&2; exit 2; }
 echo "closed $id"
 
-# 5. cascade: close each ancestor group whose direct children are all closed
+# 5. cascade: close each ancestor group whose direct children are all closed.
+# The epic root is never closed here — "root open, no open tasks" is the
+# session-complete state that epic-root.sh and the workflows rely on.
 cur_id="$id"
 while [[ "$cur_id" == *.* ]]; do
   parent="${cur_id%.*}"
-  bd show "$parent" --json >/dev/null 2>&1 || break
-  open_kids="$(bd list --json | jq --arg p "$parent" '
+  parent_json="$(bd show "$parent" --json 2>/dev/null)" || break
+  [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break
+  open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" '
     [ .[]
       | select(.id | startswith($p + "."))
       | select((.id[($p|length)+1:] | contains(".")) | not)
       | select(.status != "closed") ] | length')"
   [[ "$open_kids" -eq 0 ]] || break
-  bd close "$parent" >/dev/null 2>&1 && echo "closed group $parent"
+  if bd close "$parent" >/dev/null 2>&1; then
+    echo "closed group $parent"
+  else
+    echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2
+    break
+  fi
   cur_id="$parent"
 done
diff --git a/skills/research-step/scripts/create-task.sh b/skills/research-step/scripts/create-task.sh
index 6024cf6..1e992a9 100755
--- a/skills/research-step/scripts/create-task.sh
+++ b/skills/research-step/scripts/create-task.sh
@@ -5,16 +5,14 @@
 # execute publishes them via close-task.sh. Prints the new issue id.
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; }
 parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5
 
-python3 - "$schemas" "$task_type" <<'PY' || { echo "create-task: unknown task_type '$2' (not in schemas.yaml)" >&2; exit 3; }
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-sys.exit(0 if sys.argv[2] in d["tasks"] else 3)
-PY
+# Validate the task_type against schemas.yaml. The helper exits 3 for an
+# unknown task_type (and prints the known ones) or 5 when the schema cannot
+# be read (e.g. PyYAML missing — run init); set -e propagates either.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 [[ -n "$desc" ]]            || { echo "create-task: a brief description is required" >&2; exit 4; }
 [[ "$desc" != *$'\n'* ]]    || { echo "create-task: description must be one line" >&2; exit 4; }
@@ -22,6 +20,7 @@ PY
 
 if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi
 meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \
-  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 1, output_json: null, output_markdown: null}}')"
-tmp="$(mktemp)"; printf '%s' "$meta" > "$tmp"
+  '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')"
+tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT
+printf '%s' "$meta" > "$tmp"
 bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent
diff --git a/skills/research-step/scripts/epic-root.sh b/skills/research-step/scripts/epic-root.sh
index 13a7dfd..c176ef0 100755
--- a/skills/research-step/scripts/epic-root.sh
+++ b/skills/research-step/scripts/epic-root.sh
@@ -33,7 +33,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-ids=$(bd list --json | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
+ids=$(bd list --json --limit 0 | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id')
 count=$(printf '%s' "$ids" | grep -c . || true)
 
 case "$count" in
diff --git a/skills/research-step/scripts/next-task.sh b/skills/research-step/scripts/next-task.sh
new file mode 100755
index 0000000..97e3592
--- /dev/null
+++ b/skills/research-step/scripts/next-task.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# next-task.sh — the single definition of task ordering. Prints the open task
+# issues (status == open, metadata.research_step.task_type set), sorted
+# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical
+# sort would get this wrong past 9 siblings). Groups (no task_type) are never
+# listed; there are no dependency edges, so this order is the ordering signal.
+#
+# Used by execute (pick the next task) and update-summary (render the queue),
+# so the two never disagree about what runs next.
+#
+# Output (stdout, key: value lines):
+#   next:  <bd-id> | none
+#   queue: <space-separated bd-ids>   (omitted when empty)
+# Exit: 0 (even when next: none) · 3 bd/jq missing
+set -euo pipefail
+
+command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; }
+command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; }
+
+ids="$(bd list --json --limit 0 | jq -r '
+  [ .[]
+    | select(.status == "open")
+    | select(.metadata.research_step.task_type != null) ]
+  | sort_by(.id | split(".") | map(tonumber? // .))
+  | .[].id')"
+
+if [[ -z "$ids" ]]; then
+  echo "next: none"
+  exit 0
+fi
+
+echo "next: $(head -n1 <<<"$ids")"
+rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')"
+[[ -n "$rest" ]] && echo "queue: $rest" || true
diff --git a/skills/research-step/scripts/summary-check.sh b/skills/research-step/scripts/summary-check.sh
index 8d98b65..6a14470 100755
--- a/skills/research-step/scripts/summary-check.sh
+++ b/skills/research-step/scripts/summary-check.sh
@@ -30,7 +30,7 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 3
 fi
 
-current=$(bd list --json \
+current=$(bd list --json --limit 0 \
   | jq -r '.[] | select(.status != "closed") | .id' \
   | sort \
   | shasum -a 256 \
diff --git a/skills/research-step/scripts/task-output-keys.sh b/skills/research-step/scripts/task-output-keys.sh
new file mode 100755
index 0000000..ef1269b
--- /dev/null
+++ b/skills/research-step/scripts/task-output-keys.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# task-output-keys.sh <task_type> — print the space-separated output keys for a
+# task from assets/schemas.yaml. The single schema reader for scripts:
+# create-task.sh uses it to validate a task_type, validate-output.sh to get the
+# expected output_json keys.
+# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema
+#       (python3/PyYAML missing or schemas.yaml unreadable — run init)
+set -euo pipefail
+here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+schemas="$here/../assets/schemas.yaml"
+
+[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; }
+
+python3 - "$schemas" "$1" <<'PY'
+import sys
+
+try:
+    import yaml
+except ImportError:
+    print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+try:
+    with open(sys.argv[1]) as f:
+        d = yaml.safe_load(f)
+except Exception as e:
+    print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr)
+    sys.exit(5)
+
+tasks = d.get("tasks") or {}
+t = tasks.get(sys.argv[2])
+if t is None:
+    print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr)
+    print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr)
+    sys.exit(3)
+print(" ".join(t["output"]))
+PY
diff --git a/skills/research-step/scripts/validate-output.sh b/skills/research-step/scripts/validate-output.sh
index af3b8f6..69530f9 100755
--- a/skills/research-step/scripts/validate-output.sh
+++ b/skills/research-step/scripts/validate-output.sh
@@ -1,12 +1,16 @@
 #!/usr/bin/env bash
 # validate-output.sh <issue-id> — structural check of a task's stored output_json.
-# Reads the issue from beads, compiles assets/schemas.yaml, and checks that
-# metadata.research_step.output_json holds exactly tasks.<task_type>.output (incl. artifacts).
-# No style or quality linting.
-# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task · 4 output_json mismatch
+# Reads the issue from beads and deep-validates metadata.research_step.output_json
+# against the compiled JSON Schema (assets/compiled/<task_type>.schema.json,
+# regenerated from schemas.yaml by scripts/compile-schemas.py at build time):
+# top-level keys closed, declared nested fields required, extra nested fields
+# permitted (payloads nest verbatim). No style or quality linting.
+# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task
+#       · 4 schema violation
+#       · 5 schema unreadable (PyYAML/jsonschema missing or compiled schema
+#         absent — run the init workflow, or update the plugin)
 set -euo pipefail
 here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-schemas="$here/../assets/schemas.yaml"
 
 [[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; }
 id="$1"
@@ -16,28 +20,46 @@ rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // e
 task_type="$(jq -r '.task_type // empty' <<<"$rs")"
 [[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; }
 
-expected="$(python3 - "$schemas" "$task_type" <<'PY'
-import yaml, sys
-d = yaml.safe_load(open(sys.argv[1]))
-t = d["tasks"].get(sys.argv[2])
-if t is None: sys.exit(3)
-print(" ".join(t["output"]))
-PY
-)" || { echo "validate-output: unknown task '$task_type' (not in schemas.yaml)" >&2; exit 3; }
+# Exits 3 (unknown task_type) or 5 (schema unreadable) with its own message.
+"$here/task-output-keys.sh" "$task_type" >/dev/null
 
 got="$(jq -c '.output_json // empty' <<<"$rs")"
 [[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; }
 
-for k in $expected; do
-  jq -e --arg k "$k" 'has($k)' <<<"$got" >/dev/null \
-    || { echo "validate-output: output_json missing '$k' for '$task_type'" >&2; exit 4; }
-done
-while IFS= read -r k; do
-  case " $expected " in *" $k "*) ;; *)
-    echo "validate-output: output_json.$k is not in the '$task_type' schema — byproducts go in artifacts" >&2; exit 4 ;;
-  esac
-done < <(jq -r 'keys[]' <<<"$got")
-jq -e '.artifacts | type == "array"' <<<"$got" >/dev/null \
-  || { echo "validate-output: output_json.artifacts must be an array" >&2; exit 4; }
+schema="$here/../assets/compiled/${task_type}.schema.json"
+[[ -r "$schema" ]] || {
+  echo "validate-output: compiled schema missing for '$task_type' ($schema) — update the plugin (it is regenerated at build time)" >&2
+  exit 5
+}
+OUTPUT_JSON="$got" python3 - "$schema" "$task_type" <<'PY'
+import json
+import os
+import sys
+
+try:
+    import jsonschema
+except ImportError:
+    print("validate-output: python3 cannot import jsonschema - run the init workflow", file=sys.stderr)
+    sys.exit(5)
+
+with open(sys.argv[1]) as f:
+    schema = json.load(f)
+data = json.loads(os.environ["OUTPUT_JSON"])
+
+validator = jsonschema.Draft202012Validator(schema)
+errors = sorted(validator.iter_errors(data), key=lambda e: list(map(str, e.absolute_path)))
+if errors:
+    for e in errors[:5]:
+        path = ".".join(str(p) for p in e.absolute_path)
+        where = f"output_json.{path}" if path else "output_json"
+        hint = ""
+        if e.validator == "additionalProperties" and not path:
+            hint = " - byproducts go in artifacts"
+        print(f"validate-output: {where}: {e.message}{hint}", file=sys.stderr)
+    if len(errors) > 5:
+        print(f"validate-output: ... and {len(errors) - 5} more schema violation(s)", file=sys.stderr)
+    print(f"validate-output: output_json does not satisfy the '{sys.argv[2]}' schema", file=sys.stderr)
+    sys.exit(4)
+PY
 
 echo "ok"
diff --git a/skills/research-step/workflows/brainstorm.md b/skills/research-step/workflows/brainstorm.md
index 250ba36..6a9bbf6 100644
--- a/skills/research-step/workflows/brainstorm.md
+++ b/skills/research-step/workflows/brainstorm.md
@@ -25,27 +25,27 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m
 Pick the branch that matches; do not run more than one.
 
 - **No `mission.md`** → help the user draft one.
-  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)** from `assets/schemas.yaml` (each flow's purpose is in its `mission` field): `theorizer`, `reproduction`, `hypothesis_driven_research`, or a custom chain of tasks. A session may run more than one. Record the chosen flow(s) in `mission.md` so `plan` can read them. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
+  Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)**: open `assets/schemas.yaml` and enumerate the keys under `flows:` — do **not** offer flows from memory; the file is the only source of the list, and each flow's purpose is in its `mission` field. A custom chain of `tasks:` entries is also an option. A session may run more than one flow. Record the chosen flow(s) in `mission.md` so `plan` can read them. Also surface the session **config knobs** (the `config:` section of `assets/schemas.yaml`, e.g. `n_experiments`, `max_papers_to_retrieve`) with their defaults; record any non-default choices in a `## Config` section of `mission.md` (one `key: value` line each) — `plan` pins the resolved config on the epic at bootstrap. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**.
 
 - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session.
 
-- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / ready counts plus the single most-relevant ready task) and ask what they want to do next.
+- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / open-task counts plus the next task from `scripts/next-task.sh`) and ask what they want to do next.
 
 ### 3. Answer questions, preferring `summary.md`
 
-`summary.md` is the synthesized view of the session — mission, scope, definitions, related work, hypotheses, results, open questions, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
+`summary.md` is the synthesized view of the session — mission, flow(s), results so far (report headlines), gaps, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current.
 
-**Default path: read `summary.md`.** For most questions ("what's the current scope?", "which hypotheses are open?", "what's blocking progress?", "what's the state of H2?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
+**Default path: read `summary.md`.** For most questions ("which laws held?", "what theories came out?", "what's blocking progress?", "what's next?"), the answer is already in this file. Read it first; quote or summarize the relevant section.
 
 **Drop down to beads only when the digest doesn't have the answer.** `summary.md` summarizes; some questions need the raw outputs:
 
 | Need | Query                                                                                                  |
 |---|--------------------------------------------------------------------------------------------------------|
 | Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` |
-| Full open-issue metadata (rare; usually the digest covers it) | `bd list` |
-| Task tree | `bd list --json` — ids encode the parent-child outline |
-| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output_json.summary_path` referenced from the digest |
-| Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number)                 |
+| Full issue metadata (rare; usually the digest covers it) | `bd list --all --limit 0` |
+| Task tree | `bd list --json --all --limit 0` — ids encode the parent-child outline |
+| Long-form content behind a report | follow `report_path` (or any `_path` field) from the issue's `output_json` |
+| Exact verdict / effect size for a law, theory, or hypothesis | `bd show <adjudicate-id> --json` (the adjudication record; the digest reports headlines, not the numbers) |
 
 Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`.
 
diff --git a/skills/research-step/workflows/execute.md b/skills/research-step/workflows/execute.md
index a8596e2..b4ba1ef 100644
--- a/skills/research-step/workflows/execute.md
+++ b/skills/research-step/workflows/execute.md
@@ -9,23 +9,33 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr
 
 ## Steps
 
-1. **Pick a task.** If a task ID was supplied, use it. Else pick the **open issue that has a `task_type` and the smallest hierarchical id** — `bd list --json`, keep `status == open` with `metadata.research_step.task_type != null`, sort by id, take the first. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
-2. **Claim it.** `bd update <id> --status=in_progress`.
-3. **Load the schema.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>`; find the step by its `task_type` inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.reproduction_design`) — and use its `mission` and `chain`.
-4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
-5. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
-   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`) plus `artifacts`, and nothing else; derived or operational values (a verdict, an execution id, artifact paths) go in `artifacts`, not the typed fields. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
-   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key. Reference artifacts, papers (canonical Semantic Scholar `/paper/<sha>` URLs), and deciding tasks by link where it helps a reader. This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
-6. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal `tasks.<task_type>.output` plus `artifacts`; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed. A non-zero exit leaves the issue `in_progress` — fix and re-run. The `description` is untouched; it stays the brief one-liner set at creation.
-7. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+1. **Pick a task.** If a task ID was supplied, use it. Else run `scripts/next-task.sh` and take the `next:` id — it is the single definition of ordering (open issues with a `task_type`, numerically sorted by hierarchical id; `update-summary` renders the same order). `next: none` ⇒ report that and stop. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal.
+2. **Check readiness.** For every issue id in this task's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), verify it is `closed` with a non-null `output_json`. If any input is not ready, **stop and report it** — the graph was built out of order (a task left `in_progress`, or a replan misordering); do not improvise the missing input. This is the readiness check that dependency edges used to provide.
+3. **Claim it.** `bd update <id> --status=in_progress`.
+4. **Load the schema and config.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>.output` (a mapping of key → type; `[type]` means a JSON array of that type); find the step inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.experiment_design`) — and use its `mission`, `input`, and `chain`. Read the **session config** pinned on the epic root (`bd show <epic-id> --json | jq '.[0].metadata.research_step.config'`) and pass its values into the chain where they apply — `n_experiments` into the run-metadata JSON for `asta autodiscovery metadata`, `max_papers_to_retrieve` on `asta generate-theories find-and-extract`. Do not re-read defaults from schemas.yaml mid-session; the pin is the truth. (Sessions bootstrapped before config pinning exist: an absent pin means use the schemas.yaml defaults.)
+5. **Gather inputs.** For every issue listed in this issue's `inputs`, read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `report_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state.
+6. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things:
+   - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`), and nothing else. Fill every typed field the schema declares (including typed verdicts like `adjudication.outcome` or `audit_report.verdict_survives`); only values with **no typed field** (an execution id, intermediate file paths, raw tool output) go in `artifacts`. Artifact rows are **A2A 1.0 Artifacts** — `{artifactId, name, description, parts, metadata}`, where `parts` is an array of text / file / data parts (see `artifact` and `part` in the schema). Artifacts returned by chain commands are stored as received (their kind in `metadata.type`); locally produced byproducts (a figure, a script, a data file) are wrapped as file parts in the uri form — repo-root-relative path plus mimeType — never the bytes form (beads' ~64KB cap). Records are immutable — emit verdicts and enrichments as their own records referencing the original by id (`adjudication.subject_id`, `source_access.data_source_id`); never re-emit an upstream record with changed values. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON.
+   - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key, following the **Report conventions** below (entity hyperlinks, tables, figures). This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files.
+7. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal the keys of `tasks.<task_type>.output` — which always include `artifacts` — none null; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed (it never closes the epic root — the session-complete state is root open with no open tasks). A non-zero exit **before** the `closed <id>` line means the issue is still `in_progress` — fix and re-run. A warning **after** `closed <id>` means the task closed but a group could not be auto-closed; close that group manually. The `description` is untouched; it stays the brief one-liner set at creation.
+8. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly.
+
+## Report conventions
+
+These apply to every `output_markdown` and to every `*_synthesis` report deliverable. Rigorous but not over the top: a report stays roughly 50–100 lines; the detail behind it lives in artifacts it links to.
+
+- **Every named entity is a hyperlink.** Papers → DOI or canonical Semantic Scholar URL; datasets and result files → relative path; runs/experiments → their artifact or metadata file; laws/theories/hypotheses → their ledger row, written with an anchor (`<a id="l1"></a>`) so other reports can deep-link (`reproduction_report.md#l1`). A named thing with no link is a defect.
+- **Tables are the spine.** Any ledger, matrix, or catalog (laws × outcomes, theories × verdicts, sources × access) is a table with one row per record, mirroring the typed rows in `output_json`.
+- **Figures carry the quantitative claims.** Embed each one (`![caption](path)`) where the claim is made and list it in the `figures` output field. Analysis-type tasks must emit at least one figure; synthesis reports embed the figures their headline rests on (effect-size comparisons, verdict panels, discovery-vs-holdout shrinkage).
+- Neutral, third-person register; numbers in the text match the tables they summarize.
 
 ## Notes on output
 
-The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`).
+The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`). `output_json.artifacts` holds A2A Artifacts whose file parts reference those paths by uri; heavy payloads (base64 bytes, raw agent JSON) stay on disk, never inline.
 
 Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`:
 
-- `report_path` (from every synthesis report — `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `gap_synthesis`, `final_synthesis`) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `data_gaps_report.md`).
+- `report_path` (from every `*_synthesis` report) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `hypothesis_report.md`, `data_gaps_report.md`).
 
 If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it.
 
diff --git a/skills/research-step/workflows/init.md b/skills/research-step/workflows/init.md
index fd11be3..408c60f 100644
--- a/skills/research-step/workflows/init.md
+++ b/skills/research-step/workflows/init.md
@@ -1,6 +1,6 @@
 # Workflow: init
 
-Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
+Bootstrap the environment for a research session: install `bd`, `jq`, PyYAML, and jsonschema, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready.
 
 After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier.
 
@@ -32,12 +32,16 @@ Server mode (`bd init --server`) is out of scope: it requires running a Dolt sql
    - If no Dolt refs exist on the remote, surface the situation to the user with three options: (a) `bd import .beads/issues.jsonl` (fast, but discards Dolt history and any state newer than the export), (b) configure a Dolt remote and `bd dolt push` from another machine that has the live DB, then retry, (c) abort.
    - Pick one path only after explicit user confirmation. Never auto-import.
 
-4. **Verify the staleness check works.**
+4. **Ensure `python3` can import `yaml` (PyYAML) and `jsonschema`.** `scripts/task-output-keys.sh` (used by `create-task.sh` and `validate-output.sh`) parses `assets/schemas.yaml` with PyYAML; `validate-output.sh` deep-validates each task's `output_json` against the compiled schemas in `assets/compiled/` with jsonschema, and hard-fails (exit 5) without it.
+   - Probe with `python3 -c 'import yaml, jsonschema'`. If it succeeds, skip.
+   - Otherwise install what's missing: `python3 -m pip install --user pyyaml jsonschema` (or the platform equivalent, e.g. `apt-get install python3-yaml python3-jsonschema`). Re-probe; if it still fails, abort and ask the user.
+
+5. **Verify the staleness check works.**
    - Run `scripts/summary-check.sh`. It hashes the sorted IDs of currently-open issues and compares against `summary.md`'s frontmatter. Backend-agnostic — beads can use whichever storage it likes.
    - Requires `jq` on PATH; if missing, install it (`brew install jq`, `apt-get install jq`, etc.) and retry.
    - At init time `summary.md` does not yet exist, so the script will print `status: missing` and exit 1 — that's fine; **update-summary** will create the file later. `status: no-tools` (exit 3) means abort and ask the user.
 
-5. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
+6. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**.
 
 ## Cross-machine transfer
 
diff --git a/skills/research-step/workflows/plan.md b/skills/research-step/workflows/plan.md
index a000e2d..444ee90 100644
--- a/skills/research-step/workflows/plan.md
+++ b/skills/research-step/workflows/plan.md
@@ -23,10 +23,10 @@ The flow in `assets/schemas.yaml` is an indented outline, and the beads graph yo
 
 Reading a flow node:
 
-- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`.
-- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission` and `chain` are never nodes.
+- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`. Its `input:` names the upstream steps in this session whose issues you wire as the task's `inputs` (the same task type takes different inputs in different flows, so inputs live on the step, not the task).
+- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission`, `input`, and `chain` are never nodes.
 - A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree.
-- A **fan-out group** (`replication`, `theory_generation`, `verification`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
+- A **fan-out group** (`replication`, `theory_generation`, `verification`, `testing`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on.
 
 The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks):
 
@@ -38,10 +38,10 @@ wf                      [epic]    <mission>
   wf.1.3                          evidence_gathering
   wf.1.4                [fan-out] replication            one branch per law
    wf.1.4.1             [branch]  <law>
-    wf.1.4.1.1                    reproduction_design
+    wf.1.4.1.1                    experiment_design
     wf.1.4.1.2                    analysis
-    wf.1.4.1.3                    reproduction_audit
-    wf.1.4.1.4                    reproduce
+    wf.1.4.1.3                    audit
+    wf.1.4.1.4                    adjudicate
    wf.1.4.2             [branch]  <law> …
   wf.1.5                          reproduction_synthesis
 ```
@@ -50,35 +50,37 @@ The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproductio
 
 ## Ordering and closing (no edges)
 
-- **Next task = the open issue with a `task_type` and the smallest id.** Groups (no `task_type`) are never executed.
+- **Next task = the `next:` line of `scripts/next-task.sh`** (open issues with a `task_type`, **numerically** sorted by hierarchical id — `wf.1.2` before `wf.1.10`). Groups (no `task_type`) are never executed. `execute` and `update-summary` both use this script, so they never disagree about what runs next.
 - Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last.
-- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. Never close groups by hand.
+- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. It never closes the **epic root**: "root open, no open tasks" is the session-complete state. Never close groups by hand.
 
 ## Static vs data-dependent fan-outs
 
 - **Static** (`theory_generation` by objective): both branches are known up front → create them together.
-- **Data-dependent** (`replication` per law, `verification` per testable theory): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
+- **Data-dependent** (`replication` per law, `verification` per testable theory, `testing` per hypothesis): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`, `hypothesis_formation`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it.
 
 ## Gates (replan)
 
-- When `reproduction_design` closes: `feasibility` of `feasible`/`proxy_only` → create `analysis`, `reproduction_audit`, `reproduce` under that branch; `data_unavailable`/`construct_mismatch` → create only `reproduce` (it records the law `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
+- When `experiment_design` closes (a `replication` or `testing` branch): `feasibility` of `feasible`/`proxy_only` → create the branch's remaining steps — in `testing`, also `data_acquisition` when the design names data not yet in hand — i.e. `[data_acquisition,] analysis`, `audit`, `adjudicate`; `data_unavailable`/`construct_mismatch` → create only `adjudicate` (it records `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created.
 - When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report.
+- When `hypothesis_formation` closes: create one `testing` branch per hypothesis.
 
 ## Bootstrap
 
 1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear.
-2. `bd create -t epic` the root from the mission, tagged `epic_root: true` + the flow. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
-3. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
-4. Report the epic id, the flow, the loop/group ids, and the frontier task ids.
+2. **Resolve the session config.** Start from the `config:` defaults in `assets/schemas.yaml`; apply any overrides from a `## Config` section in `mission.md` (one `key: value` line each; unknown keys are an error — surface them). The resolved map is pinned in the next step and never re-resolved mid-session.
+3. `bd create -t epic` the root from the mission, tagged with metadata `{"research_step": {"epic_root": true, "flow": "<flow>", "config": {<resolved config>}}}`. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation.
+4. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close.
+5. Report the epic id, the flow, the resolved config, the loop/group ids, and the frontier task ids.
 
 ## Replan
 
 When a step closes, create the next node(s) under their parent, in flow order:
 
-- Create each step with `create-task.sh` (its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling).
-- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the group's steps under each via `create-task.sh` (record why for any branch the data can't support, rather than skipping it).
-- Apply the **Gates** rules above.
-- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape (provenance_report, reproduction_report, theory_report, verification_report, data_gaps_report, research_report).
+- Create each step with `create-task.sh`. Its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling; the step's `input:` list in `schemas.yaml` names **which** upstream steps to wire.
+- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the branch steps under each via `create-task.sh` — **but a gated group lays only the steps up to its gate**: under a `replication` or `testing` branch create only `experiment_design`; the Gate below creates the rest when it closes. Ungated branches (`verification`: analysis, audit, adjudicate; `theory_generation`: theory_formation) get all their steps at branch creation. Record why for any branch the data can't support, rather than skipping it.
+- Apply the **Gates** rules above — they are the only creator of post-gate steps, so nothing is double-created.
+- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `hypothesis_synthesis`, `discovery_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape.
 
 Stop at the end of the flow. If the closed step has nothing downstream, report no-op.
 
diff --git a/skills/research-step/workflows/update-summary.md b/skills/research-step/workflows/update-summary.md
index 311c81a..a96a9fa 100644
--- a/skills/research-step/workflows/update-summary.md
+++ b/skills/research-step/workflows/update-summary.md
@@ -15,12 +15,11 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both).
 
 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`.
-3. **Gather state inline.** Everything comes from `bd list --json`:
-   - the full tree (issue_count, status partition);
-   - the **open issues that have a `task_type`, sorted by id** — the first is the next task, the rest are the queue. This replaces `bd ready`; there are no edges, so id order is the ordering signal.
-   Project to `{id, task_type: .metadata.research_step.task_type, title}` and partition by `.status`.
+3. **Gather state inline.**
+   - `bd list --json --all --limit 0` for the full tree — `--all` because closed issues carry the results, `--limit 0` because bd truncates at 50 rows by default. Project to `{id, task_type: .metadata.research_step.task_type, title, status}` and partition by `.status`.
+   - `scripts/next-task.sh` for the **next task and the queue** (open task-type issues, numerically sorted by id — the same order `execute` uses). This replaces `bd ready`; there are no edges, so id order is the ordering signal.
 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`.
-5. **Overwrite `summary.md`** using this template:
+5. **Overwrite `summary.md`** using this template (sections come from the **new taxonomy** — flows, laws, theories, reports — not from any per-flow hardcoding; render what the closed tasks' `output_json` actually contains):
 
    ```markdown
    ---
@@ -28,7 +27,7 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    beads_epic: <bd-id>
    generated_at: <ISO-8601 UTC>
    issue_count: <n>
-   ready_count: <n>
+   open_task_count: <n>
    ---
 
    # <mission title>
@@ -36,36 +35,29 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is
    ## Mission
    <verbatim mission.md, or one-paragraph summary if long>
 
-   ## Research Question & Scope
-   <from scope issue's output, or "pending" if not yet closed>
+   ## Flow
+   <one line per flow this session runs (from task metadata `flow`), with where it
+   stands — e.g. "reproduction — replication branches 2/5 closed, synthesis pending">
 
-   ## Operational Definitions
-   <from definitions issue's output>
+   ## Results so far
+   <one subsection per closed `*_synthesis` task: the report's `headline` plus a link
+   to its `report_path`. Before any synthesis has closed, instead give one bullet per
+   closed task: "<bd-id> [<task_type>]: <one-line outcome from output_json>" — e.g.
+   laws extracted, datasets acquired, theories formed, verdicts finalized.>
 
-   ## Related Work
-   <literature_review.output.key_findings as bullets; link to summary_path>
-
-   ## Hypotheses
-   <one subsection per hypothesis issue: "H_n: <statement>" plus current verdict from its analysis if closed>
-
-   ## Experimental Designs
-   <one subsection per experiment_design, grouped under its hypothesis>
-
-   ## Results Summary
-   <table: hypothesis | verdict | confidence | analysis-id>
-
-   ## Open Questions
-   <synthesis.output.open_questions if synthesis exists, else aggregated from in-flight notes>
+   ## Gaps
+   <the `gaps` rows from closed report outputs (item — missing_data — severity),
+   or "none recorded">
 
    ## Status
    - Closed: <n>
    - In progress: <n> — IDs: <list>
-   - Open tasks: <n> — next: <smallest-id>; queue: <list of remaining open task ids>
+   - Open tasks: <n> — next: <`next:` from next-task.sh>; queue: <`queue:` line>
 
    ### Next Steps
-   <the open task-type issues sorted by id; lead with the next (smallest id), one bullet each:
+   <the queue from next-task.sh in order, one bullet each:
    "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>".
-   If there are no open task issues, write "No open tasks — flow complete.">
+   If next-task.sh prints `next: none`, write "No open tasks — flow complete.">
    ```
 
 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.)

From 17915045848def75305b9016fb97e15d84158275 Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Fri, 12 Jun 2026 16:34:35 -0700
Subject: [PATCH 5/6] research-step: drop compiled assets from the repo

assets/compiled/ is generated from schemas.yaml by the schema compiler at
build time; keep the source of truth only.
---
 .../assets/compiled/adjudicate.schema.json    |  144 -
 .../assets/compiled/analysis.schema.json      |  119 -
 .../assets/compiled/audit.schema.json         |  127 -
 .../assets/compiled/auto_discovery.mmd        |   18 -
 .../compiled/cohort_assembly.schema.json      |  206 -
 .../compiled/data_acquisition.schema.json     |  161 -
 ..._literature_grounded_theory_generation.mmd |   92 -
 .../data_driven_discovery.schema.json         |  152 -
 .../assets/compiled/data_provenance.mmd       |   16 -
 .../assets/compiled/discovery_run.schema.json |  170 -
 .../compiled/discovery_synthesis.schema.json  |  271 -
 .../compiled/evidence_extraction.schema.json  |  132 -
 .../compiled/evidence_gathering.schema.json   |  121 -
 .../compiled/experiment_design.schema.json    |  162 -
 .../compiled/final_synthesis.schema.json      |  289 -
 .../research-step/assets/compiled/flows.json  | 6657 -----------------
 .../assets/compiled/gap_synthesis.schema.json |  221 -
 .../compiled/holdout_replication.schema.json  |  167 -
 .../compiled/hypothesis_driven_research.mmd   |   29 -
 .../compiled/hypothesis_formation.schema.json |  126 -
 .../compiled/hypothesis_synthesis.schema.json |  224 -
 .../compiled/law_extraction.schema.json       |  139 -
 .../compiled/literature_review.schema.json    |  150 -
 .../compiled/novelty_assessment.schema.json   |  147 -
 .../provenance_extraction.schema.json         |  163 -
 .../compiled/provenance_search.schema.json    |  107 -
 .../compiled/provenance_synthesis.schema.json |  230 -
 .../assets/compiled/reproduction.mmd          |   29 -
 .../reproduction_synthesis.schema.json        |  253 -
 .../compiled/testability_triage.schema.json   |  144 -
 .../assets/compiled/theorizer.mmd             |   27 -
 .../compiled/theory_formation.schema.json     |  240 -
 .../compiled/theory_synthesis.schema.json     |  280 -
 .../verification_synthesis.schema.json        |  232 -
 .../assets/compiled/adjudicate.schema.json    |  144 -
 .../assets/compiled/analysis.schema.json      |  119 -
 .../assets/compiled/audit.schema.json         |  127 -
 .../assets/compiled/auto_discovery.mmd        |   18 -
 .../compiled/cohort_assembly.schema.json      |  206 -
 .../compiled/data_acquisition.schema.json     |  161 -
 ..._literature_grounded_theory_generation.mmd |   92 -
 .../data_driven_discovery.schema.json         |  152 -
 .../assets/compiled/data_provenance.mmd       |   16 -
 .../assets/compiled/discovery_run.schema.json |  170 -
 .../compiled/discovery_synthesis.schema.json  |  271 -
 .../compiled/evidence_extraction.schema.json  |  132 -
 .../compiled/evidence_gathering.schema.json   |  121 -
 .../compiled/experiment_design.schema.json    |  162 -
 .../compiled/final_synthesis.schema.json      |  289 -
 .../research-step/assets/compiled/flows.json  | 6657 -----------------
 .../assets/compiled/gap_synthesis.schema.json |  221 -
 .../compiled/holdout_replication.schema.json  |  167 -
 .../compiled/hypothesis_driven_research.mmd   |   29 -
 .../compiled/hypothesis_formation.schema.json |  126 -
 .../compiled/hypothesis_synthesis.schema.json |  224 -
 .../compiled/law_extraction.schema.json       |  139 -
 .../compiled/literature_review.schema.json    |  150 -
 .../compiled/novelty_assessment.schema.json   |  147 -
 .../provenance_extraction.schema.json         |  163 -
 .../compiled/provenance_search.schema.json    |  107 -
 .../compiled/provenance_synthesis.schema.json |  230 -
 .../assets/compiled/reproduction.mmd          |   29 -
 .../reproduction_synthesis.schema.json        |  253 -
 .../compiled/testability_triage.schema.json   |  144 -
 .../assets/compiled/theorizer.mmd             |   27 -
 .../compiled/theory_formation.schema.json     |  240 -
 .../compiled/theory_synthesis.schema.json     |  280 -
 .../verification_synthesis.schema.json        |  232 -
 .../assets/compiled/adjudicate.schema.json    |  144 -
 .../assets/compiled/analysis.schema.json      |  119 -
 .../assets/compiled/audit.schema.json         |  127 -
 .../assets/compiled/auto_discovery.mmd        |   18 -
 .../compiled/cohort_assembly.schema.json      |  206 -
 .../compiled/data_acquisition.schema.json     |  161 -
 ..._literature_grounded_theory_generation.mmd |   92 -
 .../data_driven_discovery.schema.json         |  152 -
 .../assets/compiled/data_provenance.mmd       |   16 -
 .../assets/compiled/discovery_run.schema.json |  170 -
 .../compiled/discovery_synthesis.schema.json  |  271 -
 .../compiled/evidence_extraction.schema.json  |  132 -
 .../compiled/evidence_gathering.schema.json   |  121 -
 .../compiled/experiment_design.schema.json    |  162 -
 .../compiled/final_synthesis.schema.json      |  289 -
 .../research-step/assets/compiled/flows.json  | 6657 -----------------
 .../assets/compiled/gap_synthesis.schema.json |  221 -
 .../compiled/holdout_replication.schema.json  |  167 -
 .../compiled/hypothesis_driven_research.mmd   |   29 -
 .../compiled/hypothesis_formation.schema.json |  126 -
 .../compiled/hypothesis_synthesis.schema.json |  224 -
 .../compiled/law_extraction.schema.json       |  139 -
 .../compiled/literature_review.schema.json    |  150 -
 .../compiled/novelty_assessment.schema.json   |  147 -
 .../provenance_extraction.schema.json         |  163 -
 .../compiled/provenance_search.schema.json    |  107 -
 .../compiled/provenance_synthesis.schema.json |  230 -
 .../assets/compiled/reproduction.mmd          |   29 -
 .../reproduction_synthesis.schema.json        |  253 -
 .../compiled/testability_triage.schema.json   |  144 -
 .../assets/compiled/theorizer.mmd             |   27 -
 .../compiled/theory_formation.schema.json     |  240 -
 .../compiled/theory_synthesis.schema.json     |  280 -
 .../verification_synthesis.schema.json        |  232 -
 102 files changed, 35235 deletions(-)
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/flows.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
 delete mode 100644 plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/audit.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/flows.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
 delete mode 100644 plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/adjudicate.schema.json
 delete mode 100644 skills/research-step/assets/compiled/analysis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/audit.schema.json
 delete mode 100644 skills/research-step/assets/compiled/auto_discovery.mmd
 delete mode 100644 skills/research-step/assets/compiled/cohort_assembly.schema.json
 delete mode 100644 skills/research-step/assets/compiled/data_acquisition.schema.json
 delete mode 100644 skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 delete mode 100644 skills/research-step/assets/compiled/data_driven_discovery.schema.json
 delete mode 100644 skills/research-step/assets/compiled/data_provenance.mmd
 delete mode 100644 skills/research-step/assets/compiled/discovery_run.schema.json
 delete mode 100644 skills/research-step/assets/compiled/discovery_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/evidence_extraction.schema.json
 delete mode 100644 skills/research-step/assets/compiled/evidence_gathering.schema.json
 delete mode 100644 skills/research-step/assets/compiled/experiment_design.schema.json
 delete mode 100644 skills/research-step/assets/compiled/final_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/flows.json
 delete mode 100644 skills/research-step/assets/compiled/gap_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/holdout_replication.schema.json
 delete mode 100644 skills/research-step/assets/compiled/hypothesis_driven_research.mmd
 delete mode 100644 skills/research-step/assets/compiled/hypothesis_formation.schema.json
 delete mode 100644 skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/law_extraction.schema.json
 delete mode 100644 skills/research-step/assets/compiled/literature_review.schema.json
 delete mode 100644 skills/research-step/assets/compiled/novelty_assessment.schema.json
 delete mode 100644 skills/research-step/assets/compiled/provenance_extraction.schema.json
 delete mode 100644 skills/research-step/assets/compiled/provenance_search.schema.json
 delete mode 100644 skills/research-step/assets/compiled/provenance_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/reproduction.mmd
 delete mode 100644 skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/testability_triage.schema.json
 delete mode 100644 skills/research-step/assets/compiled/theorizer.mmd
 delete mode 100644 skills/research-step/assets/compiled/theory_formation.schema.json
 delete mode 100644 skills/research-step/assets/compiled/theory_synthesis.schema.json
 delete mode 100644 skills/research-step/assets/compiled/verification_synthesis.schema.json

diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
deleted file mode 100644
index ccfb9d1..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/adjudicate.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/adjudicate.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "artifacts"
-  ],
-  "title": "adjudicate",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
deleted file mode 100644
index 55e557d..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/analysis.schema.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "analysis": {
-      "additionalProperties": true,
-      "properties": {
-        "assumptions": {
-          "type": "string"
-        },
-        "code": {
-          "type": "string"
-        },
-        "final_answer": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "final_answer",
-        "assumptions",
-        "code"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/analysis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "analysis": {
-      "$ref": "#/$defs/analysis"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "analysis",
-    "figures",
-    "artifacts"
-  ],
-  "title": "analysis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
deleted file mode 100644
index ca21120..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/audit.schema.json
+++ /dev/null
@@ -1,127 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "audit_report": {
-      "additionalProperties": true,
-      "properties": {
-        "artifacts_found": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "challenges": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "check": {
-                "type": "string"
-              },
-              "concern": {
-                "type": "string"
-              },
-              "outcome": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "concern",
-              "check",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "recommended_adjustment": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "verdict_survives": {
-          "type": "boolean"
-        }
-      },
-      "required": [
-        "subject_id",
-        "challenges",
-        "artifacts_found",
-        "verdict_survives",
-        "recommended_adjustment"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/audit.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "audit_report": {
-      "$ref": "#/$defs/audit_report"
-    }
-  },
-  "required": [
-    "audit_report",
-    "artifacts"
-  ],
-  "title": "audit",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
deleted file mode 100644
index 14cd992..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/auto_discovery.mmd
+++ /dev/null
@@ -1,18 +0,0 @@
-%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
-  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
-  subgraph replication["replication (at replan)"]
-    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
-  end
-  class replication replan
-  discovery_synthesis["discovery_synthesis"]
-  cohort_assembly --> discovery_run
-  discovery_run --> replication__holdout_replication
-  cohort_assembly --> replication__holdout_replication
-  discovery_run --> discovery_synthesis
-  replication --> discovery_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
deleted file mode 100644
index 4866540..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/cohort_assembly.schema.json
+++ /dev/null
@@ -1,206 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "cohort": {
-      "additionalProperties": true,
-      "properties": {
-        "discovery_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "exclusion_criteria": {
-          "type": "string"
-        },
-        "holdout_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "id": {
-          "type": "string"
-        },
-        "inclusion_criteria": {
-          "type": "string"
-        },
-        "research_question": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source_data_sources": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "research_question",
-        "inclusion_criteria",
-        "exclusion_criteria",
-        "sampling",
-        "source_data_sources",
-        "discovery_subset",
-        "holdout_subset",
-        "run_id"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/cohort_assembly.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "cohort": {
-      "$ref": "#/$defs/cohort"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "cohort",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "cohort_assembly",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
deleted file mode 100644
index 0bec23c..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/data_acquisition.schema.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "acquisition": {
-      "additionalProperties": true,
-      "properties": {
-        "access_status": {
-          "enum": [
-            "acquired",
-            "open_unfetched",
-            "restricted",
-            "not_found"
-          ]
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "dataset_id": {
-          "type": "string"
-        },
-        "local_path": {
-          "type": "string"
-        },
-        "validation_note": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "access_status",
-        "local_path",
-        "dataset_id",
-        "validation_note"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_acquisition.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "acquisitions": {
-      "items": {
-        "$ref": "#/$defs/acquisition"
-      },
-      "type": "array"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "acquisitions",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_acquisition",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
deleted file mode 100644
index cb56eed..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
+++ /dev/null
@@ -1,92 +0,0 @@
-%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  subgraph data_provenance["data_provenance [flow: data_provenance]"]
-    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    data_provenance__provenance_synthesis["provenance_synthesis"]
-  end
-  class data_provenance embed
-  subgraph reproduction["reproduction [flow: reproduction]"]
-    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-    reproduction__law_extraction["law_extraction"]
-    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-    subgraph reproduction__replication["replication (at replan)"]
-      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
-      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__adjudicate["adjudicate"]
-    end
-    class reproduction__replication replan
-    reproduction__reproduction_synthesis["reproduction_synthesis"]
-  end
-  class reproduction embed
-  subgraph theorizer["theorizer [flow: theorizer]"]
-    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    subgraph theorizer__theory_generation["theory_generation"]
-      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-    end
-    theorizer__testability_triage["testability_triage"]
-    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-    theorizer__theory_synthesis["theory_synthesis"]
-  end
-  class theorizer embed
-  subgraph verification["verification (at replan)"]
-    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__adjudicate["adjudicate"]
-  end
-  class verification replan
-  verification_synthesis["verification_synthesis"]
-  gap_synthesis["gap_synthesis"]
-  final_synthesis["final_synthesis"]
-  data_provenance__provenance_search --> data_provenance__provenance_extraction
-  data_provenance__provenance_search --> data_provenance__data_acquisition
-  data_provenance__provenance_extraction --> data_provenance__data_acquisition
-  data_provenance__provenance_search --> data_provenance__provenance_synthesis
-  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
-  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
-  reproduction__data_driven_discovery --> reproduction__law_extraction
-  reproduction__law_extraction --> reproduction__evidence_gathering
-  reproduction__law_extraction --> reproduction__replication__experiment_design
-  reproduction__evidence_gathering --> reproduction__replication__experiment_design
-  reproduction__replication__experiment_design --> reproduction__replication__analysis
-  reproduction__evidence_gathering --> reproduction__replication__analysis
-  reproduction__replication__analysis --> reproduction__replication__audit
-  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
-  reproduction__replication__analysis --> reproduction__replication__adjudicate
-  reproduction__replication__audit --> reproduction__replication__adjudicate
-  reproduction__law_extraction --> reproduction__reproduction_synthesis
-  reproduction__replication --> reproduction__reproduction_synthesis
-  reproduction__law_extraction --> theorizer__evidence_extraction
-  reproduction__replication__adjudicate --> theorizer__evidence_extraction
-  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
-  theorizer__theory_generation --> theorizer__testability_triage
-  reproduction__data_driven_discovery --> theorizer__testability_triage
-  reproduction__evidence_gathering --> theorizer__testability_triage
-  theorizer__testability_triage --> theorizer__novelty_assessment
-  theorizer__theory_generation --> theorizer__theory_synthesis
-  theorizer__novelty_assessment --> theorizer__theory_synthesis
-  theorizer__testability_triage --> theorizer__theory_synthesis
-  theorizer__testability_triage --> verification__analysis
-  reproduction__data_driven_discovery --> verification__analysis
-  reproduction__evidence_gathering --> verification__analysis
-  verification__analysis --> verification__audit
-  theorizer__testability_triage --> verification__adjudicate
-  verification__analysis --> verification__adjudicate
-  verification__audit --> verification__adjudicate
-  verification --> verification_synthesis
-  theorizer__novelty_assessment --> verification_synthesis
-  data_provenance__provenance_synthesis --> gap_synthesis
-  reproduction__reproduction_synthesis --> gap_synthesis
-  theorizer__theory_synthesis --> gap_synthesis
-  verification_synthesis --> gap_synthesis
-  data_provenance__provenance_synthesis --> final_synthesis
-  reproduction__reproduction_synthesis --> final_synthesis
-  theorizer__theory_synthesis --> final_synthesis
-  verification_synthesis --> final_synthesis
-  gap_synthesis --> final_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
deleted file mode 100644
index 14f65a7..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/data_driven_discovery.schema.json
+++ /dev/null
@@ -1,152 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_driven_discovery.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_driven_discovery",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
deleted file mode 100644
index 3b46977..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/data_provenance.mmd
+++ /dev/null
@@ -1,16 +0,0 @@
-%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-  provenance_synthesis["provenance_synthesis"]
-  provenance_search --> provenance_extraction
-  provenance_search --> data_acquisition
-  provenance_extraction --> data_acquisition
-  provenance_search --> provenance_synthesis
-  provenance_extraction --> provenance_synthesis
-  data_acquisition --> provenance_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
deleted file mode 100644
index b7ac259..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_run.schema.json
+++ /dev/null
@@ -1,170 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_run.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "discovery_run",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
deleted file mode 100644
index 29cb31f..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/discovery_synthesis.schema.json
+++ /dev/null
@@ -1,271 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "discovery_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "interpretation": {
-          "type": "string"
-        },
-        "laws": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "deciding_experiment": {
-                "type": "string"
-              },
-              "effect_size_discovery": {
-                "type": "string"
-              },
-              "effect_size_holdout": {
-                "type": "string"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "surprise": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "surprise",
-              "outcome",
-              "deciding_experiment",
-              "effect_size_discovery",
-              "effect_size_holdout"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "run_id",
-        "laws",
-        "interpretation",
-        "next_steps",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "discovery_report": {
-      "$ref": "#/$defs/discovery_report"
-    }
-  },
-  "required": [
-    "discovery_report",
-    "artifacts"
-  ],
-  "title": "discovery_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
deleted file mode 100644
index 7a53a5b..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_extraction.schema.json
+++ /dev/null
@@ -1,132 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "artifacts"
-  ],
-  "title": "evidence_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
deleted file mode 100644
index c310796..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/evidence_gathering.schema.json
+++ /dev/null
@@ -1,121 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_gathering.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "datasets",
-    "artifacts"
-  ],
-  "title": "evidence_gathering",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
deleted file mode 100644
index 458fe42..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/experiment_design.schema.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "experiment_design": {
-      "additionalProperties": true,
-      "properties": {
-        "construct_equivalence": {
-          "enum": [
-            "equivalent",
-            "proxy",
-            "mismatch"
-          ]
-        },
-        "data_gap": {
-          "type": "string"
-        },
-        "experiment_design_query": {
-          "type": "string"
-        },
-        "experiment_name": {
-          "type": "string"
-        },
-        "feasibility": {
-          "enum": [
-            "feasible",
-            "proxy_only",
-            "data_unavailable",
-            "construct_mismatch"
-          ]
-        },
-        "independent_operationalization": {
-          "type": "string"
-        },
-        "plain_language_description": {
-          "type": "string"
-        },
-        "prespecified": {
-          "additionalProperties": true,
-          "properties": {
-            "metric": {
-              "type": "string"
-            },
-            "success_threshold": {
-              "type": "string"
-            },
-            "test": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "test",
-            "metric",
-            "success_threshold"
-          ],
-          "type": "object"
-        },
-        "required_data": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "experiment_name",
-        "plain_language_description",
-        "source_operationalization",
-        "independent_operationalization",
-        "construct_equivalence",
-        "feasibility",
-        "required_data",
-        "data_gap",
-        "experiment_design_query",
-        "prespecified"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/experiment_design.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "experiment_design": {
-      "$ref": "#/$defs/experiment_design"
-    }
-  },
-  "required": [
-    "experiment_design",
-    "artifacts"
-  ],
-  "title": "experiment_design",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
deleted file mode 100644
index b00f085..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/final_synthesis.schema.json
+++ /dev/null
@@ -1,289 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "research_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "inference_chain": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "chain": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "claim": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "claim",
-              "chain"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sub_reports": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "report_path",
-              "one_line"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "tensions_and_surprises": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "evidence": {
-                "type": "string"
-              },
-              "observation": {
-                "type": "string"
-              },
-              "where": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "observation",
-              "where",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_highlights": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "claim": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_was_done": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theory_highlights",
-        "inference_chain",
-        "what_was_done",
-        "sub_reports",
-        "tensions_and_surprises",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/final_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "research_report": {
-      "$ref": "#/$defs/research_report"
-    }
-  },
-  "required": [
-    "research_report",
-    "artifacts"
-  ],
-  "title": "final_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/flows.json b/plugins/asta-preview/skills/research-step/assets/compiled/flows.json
deleted file mode 100644
index 907a432..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/flows.json
+++ /dev/null
@@ -1,6657 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "flows": {
-    "auto_discovery": {
-      "edges": [
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "discovery_run"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "discovery_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "discovery_synthesis"
-        }
-      ],
-      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta documents",
-            "asta generate-theories find-and-extract",
-            "asta autodiscovery create",
-            "asta autodiscovery upload",
-            "asta autodiscovery metadata"
-          ],
-          "id": "cohort_assembly",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
-          "name": "cohort_assembly",
-          "parent": null,
-          "replan": false,
-          "task": "cohort_assembly"
-        },
-        {
-          "chain": [
-            "asta autodiscovery submit",
-            "asta autodiscovery experiments"
-          ],
-          "id": "discovery_run",
-          "inputs": [
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
-          "name": "discovery_run",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_run"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__holdout_replication",
-          "inputs": [
-            "discovery_run",
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "holdout_replication",
-          "parent": "replication",
-          "replan": false,
-          "task": "holdout_replication"
-        },
-        {
-          "chain": [],
-          "id": "discovery_synthesis",
-          "inputs": [
-            "discovery_run",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
-          "name": "discovery_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_synthesis"
-        }
-      ]
-    },
-    "data_and_literature_grounded_theory_generation": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_provenance__data_acquisition",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "reproduction__law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "reproduction__replication__audit",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "reproduction__replication",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "adjudicate",
-          "source": "reproduction__replication__adjudicate",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "theorizer__evidence_extraction",
-          "target": "theorizer__theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__audit"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "verification__audit",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "verification",
-          "source": "verification",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "gap_synthesis",
-          "source": "gap_synthesis",
-          "target": "final_synthesis"
-        }
-      ],
-      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
-      "nodes": [
-        {
-          "id": "data_provenance",
-          "kind": "embed",
-          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
-          "name": "data_provenance",
-          "parent": null,
-          "replan": false,
-          "workflow": "data_provenance"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "data_provenance__provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "data_provenance__provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_provenance__data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "data_provenance__provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_synthesis"
-        },
-        {
-          "id": "reproduction",
-          "kind": "embed",
-          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
-          "name": "reproduction",
-          "parent": null,
-          "replan": false,
-          "workflow": "reproduction"
-        },
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "reproduction__data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "reproduction__evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "reproduction__replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": "reproduction",
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "reproduction__replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "reproduction_synthesis"
-        },
-        {
-          "id": "theorizer",
-          "kind": "embed",
-          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
-          "name": "theorizer",
-          "parent": null,
-          "replan": false,
-          "workflow": "theorizer"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "theorizer__evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theorizer__theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": "theorizer",
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theorizer__theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theorizer__theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "theorizer__novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "verification",
-          "kind": "group",
-          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
-          "name": "verification",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__analysis",
-          "inputs": [
-            "testability_triage",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "verification",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "verification",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "verification__adjudicate",
-          "inputs": [
-            "testability_triage",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
-          "name": "adjudicate",
-          "parent": "verification",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "verification_synthesis",
-          "inputs": [
-            "verification",
-            "novelty_assessment"
-          ],
-          "kind": "step",
-          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
-          "name": "verification_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "verification_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "gap_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
-          "name": "gap_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "gap_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "final_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis",
-            "gap_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
-          "name": "final_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "final_synthesis"
-        }
-      ]
-    },
-    "data_provenance": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_acquisition",
-          "target": "provenance_synthesis"
-        }
-      ],
-      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": null,
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_synthesis"
-        }
-      ]
-    },
-    "hypothesis_driven_research": {
-      "edges": [
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "hypothesis_formation"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "testing__data_acquisition",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "testing__audit",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "hypothesis_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testing",
-          "source": "testing",
-          "target": "hypothesis_synthesis"
-        }
-      ],
-      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "literature_review",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
-          "name": "literature_review",
-          "parent": null,
-          "replan": false,
-          "task": "literature_review"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "hypothesis_formation",
-          "inputs": [
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
-          "name": "hypothesis_formation",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_formation"
-        },
-        {
-          "id": "testing",
-          "kind": "group",
-          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
-          "name": "testing",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "testing__experiment_design",
-          "inputs": [
-            "hypothesis_formation",
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "testing",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "testing__data_acquisition",
-          "inputs": [
-            "experiment_design"
-          ],
-          "kind": "step",
-          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
-          "name": "data_acquisition",
-          "parent": "testing",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__analysis",
-          "inputs": [
-            "experiment_design",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "testing",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "testing",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "testing__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
-          "name": "adjudicate",
-          "parent": "testing",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "hypothesis_synthesis",
-          "inputs": [
-            "hypothesis_formation",
-            "testing"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
-          "name": "hypothesis_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_synthesis"
-        }
-      ]
-    },
-    "reproduction": {
-      "edges": [
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "data_driven_discovery",
-          "target": "law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "replication__audit",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "reproduction_synthesis"
-        }
-      ],
-      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
-      "nodes": [
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "reproduction_synthesis"
-        }
-      ]
-    },
-    "theorizer": {
-      "edges": [
-        {
-          "external": true,
-          "input": "law_extraction",
-          "source": "ext__law_extraction",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": true,
-          "input": "adjudicate",
-          "source": "ext__adjudicate",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "evidence_extraction",
-          "target": "theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "data_driven_discovery",
-          "source": "ext__data_driven_discovery",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "evidence_gathering",
-          "source": "ext__evidence_gathering",
-          "target": "testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "novelty_assessment",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "theory_synthesis"
-        }
-      ],
-      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
-      "nodes": [
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": null,
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": null,
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": null,
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "ext__adjudicate",
-          "kind": "external",
-          "mission": "",
-          "name": "adjudicate",
-          "parent": null,
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "id": "ext__data_driven_discovery",
-          "kind": "external",
-          "mission": "",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "id": "ext__evidence_gathering",
-          "kind": "external",
-          "mission": "",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "ext__law_extraction",
-          "kind": "external",
-          "mission": "",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        }
-      ]
-    }
-  },
-  "format_version": 1,
-  "schema_version": 2,
-  "tasks": {
-    "adjudicate": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/adjudicate.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "artifacts"
-        ],
-        "title": "adjudicate",
-        "type": "object"
-      }
-    },
-    "analysis": {
-      "output": {
-        "analysis": "analysis",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "analysis": {
-            "additionalProperties": true,
-            "properties": {
-              "assumptions": {
-                "type": "string"
-              },
-              "code": {
-                "type": "string"
-              },
-              "final_answer": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "final_answer",
-              "assumptions",
-              "code"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/analysis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "analysis": {
-            "$ref": "#/$defs/analysis"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "analysis",
-          "figures",
-          "artifacts"
-        ],
-        "title": "analysis",
-        "type": "object"
-      }
-    },
-    "audit": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "audit_report": "audit_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "audit_report": {
-            "additionalProperties": true,
-            "properties": {
-              "artifacts_found": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "challenges": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "check": {
-                      "type": "string"
-                    },
-                    "concern": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "concern",
-                    "check",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "recommended_adjustment": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "verdict_survives": {
-                "type": "boolean"
-              }
-            },
-            "required": [
-              "subject_id",
-              "challenges",
-              "artifacts_found",
-              "verdict_survives",
-              "recommended_adjustment"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/audit.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "audit_report": {
-            "$ref": "#/$defs/audit_report"
-          }
-        },
-        "required": [
-          "audit_report",
-          "artifacts"
-        ],
-        "title": "audit",
-        "type": "object"
-      }
-    },
-    "cohort_assembly": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "cohort": "cohort",
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "cohort": {
-            "additionalProperties": true,
-            "properties": {
-              "discovery_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "exclusion_criteria": {
-                "type": "string"
-              },
-              "holdout_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "id": {
-                "type": "string"
-              },
-              "inclusion_criteria": {
-                "type": "string"
-              },
-              "research_question": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source_data_sources": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "research_question",
-              "inclusion_criteria",
-              "exclusion_criteria",
-              "sampling",
-              "source_data_sources",
-              "discovery_subset",
-              "holdout_subset",
-              "run_id"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/cohort_assembly.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "cohort": {
-            "$ref": "#/$defs/cohort"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "cohort",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "cohort_assembly",
-        "type": "object"
-      }
-    },
-    "data_acquisition": {
-      "output": {
-        "acquisitions": [
-          "acquisition"
-        ],
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "acquisition": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "validation_note": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "access_status",
-              "local_path",
-              "dataset_id",
-              "validation_note"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_acquisition.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "acquisitions": {
-            "items": {
-              "$ref": "#/$defs/acquisition"
-            },
-            "type": "array"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "acquisitions",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_acquisition",
-        "type": "object"
-      }
-    },
-    "data_driven_discovery": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_driven_discovery.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_driven_discovery",
-        "type": "object"
-      }
-    },
-    "discovery_run": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_run.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "discovery_run",
-        "type": "object"
-      }
-    },
-    "discovery_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "discovery_report": "discovery_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "discovery_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "interpretation": {
-                "type": "string"
-              },
-              "laws": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "deciding_experiment": {
-                      "type": "string"
-                    },
-                    "effect_size_discovery": {
-                      "type": "string"
-                    },
-                    "effect_size_holdout": {
-                      "type": "string"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "surprise": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "surprise",
-                    "outcome",
-                    "deciding_experiment",
-                    "effect_size_discovery",
-                    "effect_size_holdout"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "run_id",
-              "laws",
-              "interpretation",
-              "next_steps",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "discovery_report": {
-            "$ref": "#/$defs/discovery_report"
-          }
-        },
-        "required": [
-          "discovery_report",
-          "artifacts"
-        ],
-        "title": "discovery_synthesis",
-        "type": "object"
-      }
-    },
-    "evidence_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "artifacts"
-        ],
-        "title": "evidence_extraction",
-        "type": "object"
-      }
-    },
-    "evidence_gathering": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_gathering.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "datasets",
-          "artifacts"
-        ],
-        "title": "evidence_gathering",
-        "type": "object"
-      }
-    },
-    "experiment_design": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "experiment_design": "experiment_design"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "experiment_design": {
-            "additionalProperties": true,
-            "properties": {
-              "construct_equivalence": {
-                "enum": [
-                  "equivalent",
-                  "proxy",
-                  "mismatch"
-                ]
-              },
-              "data_gap": {
-                "type": "string"
-              },
-              "experiment_design_query": {
-                "type": "string"
-              },
-              "experiment_name": {
-                "type": "string"
-              },
-              "feasibility": {
-                "enum": [
-                  "feasible",
-                  "proxy_only",
-                  "data_unavailable",
-                  "construct_mismatch"
-                ]
-              },
-              "independent_operationalization": {
-                "type": "string"
-              },
-              "plain_language_description": {
-                "type": "string"
-              },
-              "prespecified": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "experiment_name",
-              "plain_language_description",
-              "source_operationalization",
-              "independent_operationalization",
-              "construct_equivalence",
-              "feasibility",
-              "required_data",
-              "data_gap",
-              "experiment_design_query",
-              "prespecified"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/experiment_design.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "experiment_design": {
-            "$ref": "#/$defs/experiment_design"
-          }
-        },
-        "required": [
-          "experiment_design",
-          "artifacts"
-        ],
-        "title": "experiment_design",
-        "type": "object"
-      }
-    },
-    "final_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "research_report": "research_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "research_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "inference_chain": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "chain": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "claim": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "claim",
-                    "chain"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sub_reports": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "kind": {
-                      "type": "string"
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "report_path": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "kind",
-                    "report_path",
-                    "one_line"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "tensions_and_surprises": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "observation": {
-                      "type": "string"
-                    },
-                    "where": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "observation",
-                    "where",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_highlights": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "claim": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_was_done": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theory_highlights",
-              "inference_chain",
-              "what_was_done",
-              "sub_reports",
-              "tensions_and_surprises",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/final_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "research_report": {
-            "$ref": "#/$defs/research_report"
-          }
-        },
-        "required": [
-          "research_report",
-          "artifacts"
-        ],
-        "title": "final_synthesis",
-        "type": "object"
-      }
-    },
-    "gap_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_gaps_report": "data_gaps_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_gaps_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "arose_in": {
-                      "type": "string"
-                    },
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity",
-                    "arose_in"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "gaps",
-              "next_steps",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/gap_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_gaps_report": {
-            "$ref": "#/$defs/data_gaps_report"
-          }
-        },
-        "required": [
-          "data_gaps_report",
-          "artifacts"
-        ],
-        "title": "gap_synthesis",
-        "type": "object"
-      }
-    },
-    "holdout_replication": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/holdout_replication.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "figures",
-          "artifacts"
-        ],
-        "title": "holdout_replication",
-        "type": "object"
-      }
-    },
-    "hypothesis_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypotheses": [
-          "hypothesis"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "hypothesis": {
-            "additionalProperties": true,
-            "properties": {
-              "falsifiable_prediction": {
-                "type": "string"
-              },
-              "grounds": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "rationale": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "rationale",
-              "falsifiable_prediction",
-              "grounds"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypotheses": {
-            "items": {
-              "$ref": "#/$defs/hypothesis"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "hypotheses",
-          "artifacts"
-        ],
-        "title": "hypothesis_formation",
-        "type": "object"
-      }
-    },
-    "hypothesis_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypothesis_report": "hypothesis_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "hypothesis_report": {
-            "additionalProperties": true,
-            "properties": {
-              "answer": {
-                "type": "string"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "hypothesis_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "hypothesis_id",
-                    "statement",
-                    "outcome",
-                    "effect_size_observed",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_questions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "question": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "question",
-              "ledger",
-              "answer",
-              "open_questions",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypothesis_report": {
-            "$ref": "#/$defs/hypothesis_report"
-          }
-        },
-        "required": [
-          "hypothesis_report",
-          "artifacts"
-        ],
-        "title": "hypothesis_synthesis",
-        "type": "object"
-      }
-    },
-    "law_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/law_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "law_extraction",
-        "type": "object"
-      }
-    },
-    "literature_review": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "literature_review": "literature_review"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "literature_review": {
-            "additionalProperties": true,
-            "properties": {
-              "citations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "corpus_id": {
-                      "type": "number"
-                    },
-                    "id": {
-                      "type": "string"
-                    },
-                    "relevance": {
-                      "type": "string"
-                    },
-                    "title": {
-                      "type": "string"
-                    },
-                    "url": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "id",
-                    "corpus_id",
-                    "title",
-                    "url",
-                    "relevance"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "key_findings": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_gaps": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "summary": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "summary",
-              "key_findings",
-              "open_gaps",
-              "citations"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/literature_review.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "literature_review": {
-            "$ref": "#/$defs/literature_review"
-          }
-        },
-        "required": [
-          "literature_review",
-          "artifacts"
-        ],
-        "title": "literature_review",
-        "type": "object"
-      }
-    },
-    "novelty_assessment": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_evaluations": [
-          "theory_evaluation"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_evaluation": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "overall_support": {
-                "enum": [
-                  "supports",
-                  "mixed",
-                  "contradicts",
-                  "inconclusive"
-                ]
-              },
-              "overall_support_raw": {
-                "type": "string"
-              },
-              "statement_evaluations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "explanation": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "statement_index": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "statement_index",
-                    "novelty",
-                    "explanation"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "theory_id",
-              "novelty",
-              "overall_support",
-              "explanation",
-              "statement_evaluations"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/novelty_assessment.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_evaluations": {
-            "items": {
-              "$ref": "#/$defs/theory_evaluation"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theory_evaluations",
-          "artifacts"
-        ],
-        "title": "novelty_assessment",
-        "type": "object"
-      }
-    },
-    "provenance_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data",
-        "source_access": [
-          "source_access"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "source_access": {
-            "additionalProperties": true,
-            "properties": {
-              "data_availability": {
-                "type": "string"
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "identifier": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "data_availability",
-              "repository",
-              "identifier"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          },
-          "source_access": {
-            "items": {
-              "$ref": "#/$defs/source_access"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "source_access",
-          "artifacts"
-        ],
-        "title": "provenance_extraction",
-        "type": "object"
-      }
-    },
-    "provenance_search": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_sources": [
-          "data_source"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_source": {
-            "additionalProperties": true,
-            "properties": {
-              "dataset_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "dataset_id",
-              "paper_id",
-              "paper_title",
-              "paper_url"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_search.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_sources": {
-            "items": {
-              "$ref": "#/$defs/data_source"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "data_sources",
-          "artifacts"
-        ],
-        "title": "provenance_search",
-        "type": "object"
-      }
-    },
-    "provenance_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "provenance_report": "provenance_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "provenance_report": {
-            "additionalProperties": true,
-            "properties": {
-              "acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "not_acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sources": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "access_status": {
-                      "enum": [
-                        "acquired",
-                        "open_unfetched",
-                        "restricted",
-                        "not_found"
-                      ]
-                    },
-                    "dataset_id": {
-                      "type": "string"
-                    },
-                    "local_path": {
-                      "type": "string"
-                    },
-                    "paper_title": {
-                      "type": "string"
-                    },
-                    "paper_url": {
-                      "type": "string"
-                    },
-                    "repository": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "dataset_id",
-                    "paper_title",
-                    "paper_url",
-                    "repository",
-                    "access_status",
-                    "local_path"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "sources",
-              "method_note",
-              "acquired",
-              "not_acquired",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "provenance_report": {
-            "$ref": "#/$defs/provenance_report"
-          }
-        },
-        "required": [
-          "provenance_report",
-          "artifacts"
-        ],
-        "title": "provenance_synthesis",
-        "type": "object"
-      }
-    },
-    "reproduction_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "reproduction_report": "reproduction_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "reproduction_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "laws_ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "effect_size_source": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "independence_axes": {
-                      "items": {
-                        "enum": [
-                          "region",
-                          "instrument",
-                          "method",
-                          "construct",
-                          "temporal",
-                          "population"
-                        ]
-                      },
-                      "type": "array"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "testability": {
-                      "enum": [
-                        "tested",
-                        "proxy_only",
-                        "untestable"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "outcome",
-                    "testability",
-                    "effect_size_source",
-                    "effect_size_observed",
-                    "independence_axes",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_failed_or_untestable": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_held": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "method_note",
-              "laws_ledger",
-              "what_held",
-              "what_failed_or_untestable",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/reproduction_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "reproduction_report": {
-            "$ref": "#/$defs/reproduction_report"
-          }
-        },
-        "required": [
-          "reproduction_report",
-          "artifacts"
-        ],
-        "title": "reproduction_synthesis",
-        "type": "object"
-      }
-    },
-    "testability_triage": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "testability_triage": "testability_triage"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "testability_triage": {
-            "additionalProperties": true,
-            "properties": {
-              "assessments": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "available_data": {
-                      "type": "string"
-                    },
-                    "gap": {
-                      "type": "string"
-                    },
-                    "proposed_test": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "metric": {
-                          "type": "string"
-                        },
-                        "success_threshold": {
-                          "type": "string"
-                        },
-                        "test": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "test",
-                        "metric",
-                        "success_threshold"
-                      ],
-                      "type": "object"
-                    },
-                    "required_data": {
-                      "type": "string"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "testable_now",
-                    "available_data",
-                    "required_data",
-                    "proposed_test",
-                    "gap"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "testable_theory_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "assessments",
-              "testable_theory_ids"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/testability_triage.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "testability_triage": {
-            "$ref": "#/$defs/testability_triage"
-          }
-        },
-        "required": [
-          "testability_triage",
-          "artifacts"
-        ],
-        "title": "testability_triage",
-        "type": "object"
-      }
-    },
-    "theory_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theories": [
-          "theory"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory": {
-            "additionalProperties": true,
-            "properties": {
-              "components": {
-                "additionalProperties": true,
-                "properties": {
-                  "generation_objective": {
-                    "type": "string"
-                  },
-                  "new_predictions_likely": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "new_predictions_unknown": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statements": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "conflicting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "statement_name": {
-                          "type": "string"
-                        },
-                        "supporting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "theory_statement": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "statement_name",
-                        "theory_statement",
-                        "supporting_evidence",
-                        "conflicting_evidence"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "unaccounted_for": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "generation_objective",
-                  "theory_statements",
-                  "new_predictions_likely",
-                  "new_predictions_unknown",
-                  "unaccounted_for"
-                ],
-                "type": "object"
-              },
-              "description": {
-                "type": "string"
-              },
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "name": {
-                "type": "string"
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "theory_query": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "name",
-              "description",
-              "theory_query",
-              "objective",
-              "grounds_law_ids",
-              "supporting_evidence_ids",
-              "components"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theories": {
-            "items": {
-              "$ref": "#/$defs/theory"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theories",
-          "artifacts"
-        ],
-        "title": "theory_formation",
-        "type": "object"
-      }
-    },
-    "theory_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_report": "theory_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "new_predictions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "novelty_summary": {
-                "type": "string"
-              },
-              "open_threads": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "theories": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "grounds_law_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "name": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "objective": {
-                      "enum": [
-                        "accuracy_focused",
-                        "novelty_focused"
-                      ]
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "supporting_evidence_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "name",
-                    "objective",
-                    "one_line",
-                    "grounds_law_ids",
-                    "novelty",
-                    "testable_now",
-                    "supporting_evidence_ids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theories",
-              "novelty_summary",
-              "new_predictions",
-              "open_threads",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_report": {
-            "$ref": "#/$defs/theory_report"
-          }
-        },
-        "required": [
-          "theory_report",
-          "artifacts"
-        ],
-        "title": "theory_synthesis",
-        "type": "object"
-      }
-    },
-    "verification_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "verification_report": "verification_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "verification_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "novelty_by_verification": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "audit_survived": {
-                      "type": "boolean"
-                    },
-                    "claim": {
-                      "type": "string"
-                    },
-                    "data_used": {
-                      "type": "string"
-                    },
-                    "effect_size": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome",
-                    "effect_size",
-                    "data_used",
-                    "audit_survived"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_could_not_be_tested": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_was_tested": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "novelty_by_verification",
-              "what_was_tested",
-              "what_could_not_be_tested",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/verification_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "verification_report": {
-            "$ref": "#/$defs/verification_report"
-          }
-        },
-        "required": [
-          "verification_report",
-          "artifacts"
-        ],
-        "title": "verification_synthesis",
-        "type": "object"
-      }
-    }
-  }
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
deleted file mode 100644
index 760fbb5..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/gap_synthesis.schema.json
+++ /dev/null
@@ -1,221 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_gaps_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "arose_in": {
-                "type": "string"
-              },
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity",
-              "arose_in"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "gaps",
-        "next_steps",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/gap_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_gaps_report": {
-      "$ref": "#/$defs/data_gaps_report"
-    }
-  },
-  "required": [
-    "data_gaps_report",
-    "artifacts"
-  ],
-  "title": "gap_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
deleted file mode 100644
index 9d18252..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/holdout_replication.schema.json
+++ /dev/null
@@ -1,167 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/holdout_replication.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "figures",
-    "artifacts"
-  ],
-  "title": "holdout_replication",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
deleted file mode 100644
index e996ef7..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  literature_review["literature_review<br/>asta literature find · asta papers search"]
-  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph testing["testing (at replan)"]
-    testing__experiment_design["experiment_design<br/>asta experiment"]
-    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__adjudicate["adjudicate"]
-  end
-  class testing replan
-  hypothesis_synthesis["hypothesis_synthesis"]
-  literature_review --> hypothesis_formation
-  hypothesis_formation --> testing__experiment_design
-  literature_review --> testing__experiment_design
-  testing__experiment_design --> testing__data_acquisition
-  testing__experiment_design --> testing__analysis
-  testing__data_acquisition --> testing__analysis
-  testing__analysis --> testing__audit
-  testing__experiment_design --> testing__adjudicate
-  testing__analysis --> testing__adjudicate
-  testing__audit --> testing__adjudicate
-  hypothesis_formation --> hypothesis_synthesis
-  testing --> hypothesis_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
deleted file mode 100644
index 694d94f..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_formation.schema.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "hypothesis": {
-      "additionalProperties": true,
-      "properties": {
-        "falsifiable_prediction": {
-          "type": "string"
-        },
-        "grounds": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "rationale": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "rationale",
-        "falsifiable_prediction",
-        "grounds"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypotheses": {
-      "items": {
-        "$ref": "#/$defs/hypothesis"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "hypotheses",
-    "artifacts"
-  ],
-  "title": "hypothesis_formation",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
deleted file mode 100644
index b2fe767..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
+++ /dev/null
@@ -1,224 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "hypothesis_report": {
-      "additionalProperties": true,
-      "properties": {
-        "answer": {
-          "type": "string"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "hypothesis_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "hypothesis_id",
-              "statement",
-              "outcome",
-              "effect_size_observed",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_questions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "question": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "question",
-        "ledger",
-        "answer",
-        "open_questions",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypothesis_report": {
-      "$ref": "#/$defs/hypothesis_report"
-    }
-  },
-  "required": [
-    "hypothesis_report",
-    "artifacts"
-  ],
-  "title": "hypothesis_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
deleted file mode 100644
index 7b3e1fc..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/law_extraction.schema.json
+++ /dev/null
@@ -1,139 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/law_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "law_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
deleted file mode 100644
index 14df7b7..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/literature_review.schema.json
+++ /dev/null
@@ -1,150 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "literature_review": {
-      "additionalProperties": true,
-      "properties": {
-        "citations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "corpus_id": {
-                "type": "number"
-              },
-              "id": {
-                "type": "string"
-              },
-              "relevance": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "corpus_id",
-              "title",
-              "url",
-              "relevance"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "key_findings": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_gaps": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "summary": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "summary",
-        "key_findings",
-        "open_gaps",
-        "citations"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/literature_review.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "literature_review": {
-      "$ref": "#/$defs/literature_review"
-    }
-  },
-  "required": [
-    "literature_review",
-    "artifacts"
-  ],
-  "title": "literature_review",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
deleted file mode 100644
index 729f9fe..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/novelty_assessment.schema.json
+++ /dev/null
@@ -1,147 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_evaluation": {
-      "additionalProperties": true,
-      "properties": {
-        "explanation": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "novelty": {
-          "enum": [
-            "established",
-            "derivable",
-            "genuinely_new"
-          ]
-        },
-        "overall_support": {
-          "enum": [
-            "supports",
-            "mixed",
-            "contradicts",
-            "inconclusive"
-          ]
-        },
-        "overall_support_raw": {
-          "type": "string"
-        },
-        "statement_evaluations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "statement_index": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "statement_index",
-              "novelty",
-              "explanation"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "theory_id",
-        "novelty",
-        "overall_support",
-        "explanation",
-        "statement_evaluations"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/novelty_assessment.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_evaluations": {
-      "items": {
-        "$ref": "#/$defs/theory_evaluation"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theory_evaluations",
-    "artifacts"
-  ],
-  "title": "novelty_assessment",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
deleted file mode 100644
index 2bd4ea8..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_extraction.schema.json
+++ /dev/null
@@ -1,163 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "source_access": {
-      "additionalProperties": true,
-      "properties": {
-        "data_availability": {
-          "type": "string"
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "identifier": {
-          "type": "string"
-        },
-        "repository": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "data_availability",
-        "repository",
-        "identifier"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    },
-    "source_access": {
-      "items": {
-        "$ref": "#/$defs/source_access"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "source_access",
-    "artifacts"
-  ],
-  "title": "provenance_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
deleted file mode 100644
index 8a924d9..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_search.schema.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_source": {
-      "additionalProperties": true,
-      "properties": {
-        "dataset_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "paper_title": {
-          "type": "string"
-        },
-        "paper_url": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "dataset_id",
-        "paper_id",
-        "paper_title",
-        "paper_url"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_search.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_sources": {
-      "items": {
-        "$ref": "#/$defs/data_source"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "data_sources",
-    "artifacts"
-  ],
-  "title": "provenance_search",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
deleted file mode 100644
index 0d43a6f..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/provenance_synthesis.schema.json
+++ /dev/null
@@ -1,230 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "provenance_report": {
-      "additionalProperties": true,
-      "properties": {
-        "acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "not_acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sources": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "dataset_id",
-              "paper_title",
-              "paper_url",
-              "repository",
-              "access_status",
-              "local_path"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "sources",
-        "method_note",
-        "acquired",
-        "not_acquired",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "provenance_report": {
-      "$ref": "#/$defs/provenance_report"
-    }
-  },
-  "required": [
-    "provenance_report",
-    "artifacts"
-  ],
-  "title": "provenance_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
deleted file mode 100644
index 4bb9e6e..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-  law_extraction["law_extraction"]
-  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-  subgraph replication["replication (at replan)"]
-    replication__experiment_design["experiment_design<br/>asta experiment"]
-    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__adjudicate["adjudicate"]
-  end
-  class replication replan
-  reproduction_synthesis["reproduction_synthesis"]
-  data_driven_discovery --> law_extraction
-  law_extraction --> evidence_gathering
-  law_extraction --> replication__experiment_design
-  evidence_gathering --> replication__experiment_design
-  replication__experiment_design --> replication__analysis
-  evidence_gathering --> replication__analysis
-  replication__analysis --> replication__audit
-  replication__experiment_design --> replication__adjudicate
-  replication__analysis --> replication__adjudicate
-  replication__audit --> replication__adjudicate
-  law_extraction --> reproduction_synthesis
-  replication --> reproduction_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
deleted file mode 100644
index 570e076..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
+++ /dev/null
@@ -1,253 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "reproduction_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "laws_ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "outcome",
-              "testability",
-              "effect_size_source",
-              "effect_size_observed",
-              "independence_axes",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_failed_or_untestable": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_held": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "method_note",
-        "laws_ledger",
-        "what_held",
-        "what_failed_or_untestable",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/reproduction_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "reproduction_report": {
-      "$ref": "#/$defs/reproduction_report"
-    }
-  },
-  "required": [
-    "reproduction_report",
-    "artifacts"
-  ],
-  "title": "reproduction_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
deleted file mode 100644
index 8968920..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/testability_triage.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "testability_triage": {
-      "additionalProperties": true,
-      "properties": {
-        "assessments": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "available_data": {
-                "type": "string"
-              },
-              "gap": {
-                "type": "string"
-              },
-              "proposed_test": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "testable_now",
-              "available_data",
-              "required_data",
-              "proposed_test",
-              "gap"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "testable_theory_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "assessments",
-        "testable_theory_ids"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/testability_triage.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "testability_triage": {
-      "$ref": "#/$defs/testability_triage"
-    }
-  },
-  "required": [
-    "testability_triage",
-    "artifacts"
-  ],
-  "title": "testability_triage",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd b/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
deleted file mode 100644
index 59e2d0f..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/theorizer.mmd
+++ /dev/null
@@ -1,27 +0,0 @@
-%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph theory_generation["theory_generation"]
-    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-  end
-  testability_triage["testability_triage"]
-  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-  theory_synthesis["theory_synthesis"]
-  ext__adjudicate(["adjudicate (external)"]):::external
-  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
-  ext__evidence_gathering(["evidence_gathering (external)"]):::external
-  ext__law_extraction(["law_extraction (external)"]):::external
-  ext__law_extraction -.-> evidence_extraction
-  ext__adjudicate -.-> evidence_extraction
-  evidence_extraction --> theory_generation__theory_formation
-  theory_generation --> testability_triage
-  ext__data_driven_discovery -.-> testability_triage
-  ext__evidence_gathering -.-> testability_triage
-  testability_triage --> novelty_assessment
-  theory_generation --> theory_synthesis
-  novelty_assessment --> theory_synthesis
-  testability_triage --> theory_synthesis
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
deleted file mode 100644
index 7373cec..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/theory_formation.schema.json
+++ /dev/null
@@ -1,240 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory": {
-      "additionalProperties": true,
-      "properties": {
-        "components": {
-          "additionalProperties": true,
-          "properties": {
-            "generation_objective": {
-              "type": "string"
-            },
-            "new_predictions_likely": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "new_predictions_unknown": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "theory_statements": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "statement_name": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statement": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "statement_name",
-                  "theory_statement",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            },
-            "unaccounted_for": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "text": {
-                    "type": "string"
-                  },
-                  "uuids": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "text",
-                  "uuids"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "generation_objective",
-            "theory_statements",
-            "new_predictions_likely",
-            "new_predictions_unknown",
-            "unaccounted_for"
-          ],
-          "type": "object"
-        },
-        "description": {
-          "type": "string"
-        },
-        "grounds_law_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        },
-        "objective": {
-          "enum": [
-            "accuracy_focused",
-            "novelty_focused"
-          ]
-        },
-        "supporting_evidence_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "theory_query": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "name",
-        "description",
-        "theory_query",
-        "objective",
-        "grounds_law_ids",
-        "supporting_evidence_ids",
-        "components"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theories": {
-      "items": {
-        "$ref": "#/$defs/theory"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theories",
-    "artifacts"
-  ],
-  "title": "theory_formation",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
deleted file mode 100644
index dd2768e..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/theory_synthesis.schema.json
+++ /dev/null
@@ -1,280 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "new_predictions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "novelty_summary": {
-          "type": "string"
-        },
-        "open_threads": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "theories": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "name": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "name",
-              "objective",
-              "one_line",
-              "grounds_law_ids",
-              "novelty",
-              "testable_now",
-              "supporting_evidence_ids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theories",
-        "novelty_summary",
-        "new_predictions",
-        "open_threads",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_report": {
-      "$ref": "#/$defs/theory_report"
-    }
-  },
-  "required": [
-    "theory_report",
-    "artifacts"
-  ],
-  "title": "theory_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json b/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
deleted file mode 100644
index 8d1a639..0000000
--- a/plugins/asta-preview/skills/research-step/assets/compiled/verification_synthesis.schema.json
+++ /dev/null
@@ -1,232 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "verification_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "novelty_by_verification": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "audit_survived": {
-                "type": "boolean"
-              },
-              "claim": {
-                "type": "string"
-              },
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome",
-              "effect_size",
-              "data_used",
-              "audit_survived"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_could_not_be_tested": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_was_tested": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "novelty_by_verification",
-        "what_was_tested",
-        "what_could_not_be_tested",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/verification_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "verification_report": {
-      "$ref": "#/$defs/verification_report"
-    }
-  },
-  "required": [
-    "verification_report",
-    "artifacts"
-  ],
-  "title": "verification_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json b/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
deleted file mode 100644
index ccfb9d1..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/adjudicate.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/adjudicate.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "artifacts"
-  ],
-  "title": "adjudicate",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json b/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
deleted file mode 100644
index 55e557d..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/analysis.schema.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "analysis": {
-      "additionalProperties": true,
-      "properties": {
-        "assumptions": {
-          "type": "string"
-        },
-        "code": {
-          "type": "string"
-        },
-        "final_answer": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "final_answer",
-        "assumptions",
-        "code"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/analysis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "analysis": {
-      "$ref": "#/$defs/analysis"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "analysis",
-    "figures",
-    "artifacts"
-  ],
-  "title": "analysis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/audit.schema.json b/plugins/asta/skills/research-step/assets/compiled/audit.schema.json
deleted file mode 100644
index ca21120..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/audit.schema.json
+++ /dev/null
@@ -1,127 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "audit_report": {
-      "additionalProperties": true,
-      "properties": {
-        "artifacts_found": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "challenges": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "check": {
-                "type": "string"
-              },
-              "concern": {
-                "type": "string"
-              },
-              "outcome": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "concern",
-              "check",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "recommended_adjustment": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "verdict_survives": {
-          "type": "boolean"
-        }
-      },
-      "required": [
-        "subject_id",
-        "challenges",
-        "artifacts_found",
-        "verdict_survives",
-        "recommended_adjustment"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/audit.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "audit_report": {
-      "$ref": "#/$defs/audit_report"
-    }
-  },
-  "required": [
-    "audit_report",
-    "artifacts"
-  ],
-  "title": "audit",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd b/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
deleted file mode 100644
index 14cd992..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/auto_discovery.mmd
+++ /dev/null
@@ -1,18 +0,0 @@
-%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
-  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
-  subgraph replication["replication (at replan)"]
-    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
-  end
-  class replication replan
-  discovery_synthesis["discovery_synthesis"]
-  cohort_assembly --> discovery_run
-  discovery_run --> replication__holdout_replication
-  cohort_assembly --> replication__holdout_replication
-  discovery_run --> discovery_synthesis
-  replication --> discovery_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json b/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
deleted file mode 100644
index 4866540..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/cohort_assembly.schema.json
+++ /dev/null
@@ -1,206 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "cohort": {
-      "additionalProperties": true,
-      "properties": {
-        "discovery_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "exclusion_criteria": {
-          "type": "string"
-        },
-        "holdout_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "id": {
-          "type": "string"
-        },
-        "inclusion_criteria": {
-          "type": "string"
-        },
-        "research_question": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source_data_sources": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "research_question",
-        "inclusion_criteria",
-        "exclusion_criteria",
-        "sampling",
-        "source_data_sources",
-        "discovery_subset",
-        "holdout_subset",
-        "run_id"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/cohort_assembly.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "cohort": {
-      "$ref": "#/$defs/cohort"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "cohort",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "cohort_assembly",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json b/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
deleted file mode 100644
index 0bec23c..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/data_acquisition.schema.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "acquisition": {
-      "additionalProperties": true,
-      "properties": {
-        "access_status": {
-          "enum": [
-            "acquired",
-            "open_unfetched",
-            "restricted",
-            "not_found"
-          ]
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "dataset_id": {
-          "type": "string"
-        },
-        "local_path": {
-          "type": "string"
-        },
-        "validation_note": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "access_status",
-        "local_path",
-        "dataset_id",
-        "validation_note"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_acquisition.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "acquisitions": {
-      "items": {
-        "$ref": "#/$defs/acquisition"
-      },
-      "type": "array"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "acquisitions",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_acquisition",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
deleted file mode 100644
index cb56eed..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
+++ /dev/null
@@ -1,92 +0,0 @@
-%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  subgraph data_provenance["data_provenance [flow: data_provenance]"]
-    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    data_provenance__provenance_synthesis["provenance_synthesis"]
-  end
-  class data_provenance embed
-  subgraph reproduction["reproduction [flow: reproduction]"]
-    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-    reproduction__law_extraction["law_extraction"]
-    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-    subgraph reproduction__replication["replication (at replan)"]
-      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
-      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__adjudicate["adjudicate"]
-    end
-    class reproduction__replication replan
-    reproduction__reproduction_synthesis["reproduction_synthesis"]
-  end
-  class reproduction embed
-  subgraph theorizer["theorizer [flow: theorizer]"]
-    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    subgraph theorizer__theory_generation["theory_generation"]
-      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-    end
-    theorizer__testability_triage["testability_triage"]
-    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-    theorizer__theory_synthesis["theory_synthesis"]
-  end
-  class theorizer embed
-  subgraph verification["verification (at replan)"]
-    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__adjudicate["adjudicate"]
-  end
-  class verification replan
-  verification_synthesis["verification_synthesis"]
-  gap_synthesis["gap_synthesis"]
-  final_synthesis["final_synthesis"]
-  data_provenance__provenance_search --> data_provenance__provenance_extraction
-  data_provenance__provenance_search --> data_provenance__data_acquisition
-  data_provenance__provenance_extraction --> data_provenance__data_acquisition
-  data_provenance__provenance_search --> data_provenance__provenance_synthesis
-  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
-  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
-  reproduction__data_driven_discovery --> reproduction__law_extraction
-  reproduction__law_extraction --> reproduction__evidence_gathering
-  reproduction__law_extraction --> reproduction__replication__experiment_design
-  reproduction__evidence_gathering --> reproduction__replication__experiment_design
-  reproduction__replication__experiment_design --> reproduction__replication__analysis
-  reproduction__evidence_gathering --> reproduction__replication__analysis
-  reproduction__replication__analysis --> reproduction__replication__audit
-  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
-  reproduction__replication__analysis --> reproduction__replication__adjudicate
-  reproduction__replication__audit --> reproduction__replication__adjudicate
-  reproduction__law_extraction --> reproduction__reproduction_synthesis
-  reproduction__replication --> reproduction__reproduction_synthesis
-  reproduction__law_extraction --> theorizer__evidence_extraction
-  reproduction__replication__adjudicate --> theorizer__evidence_extraction
-  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
-  theorizer__theory_generation --> theorizer__testability_triage
-  reproduction__data_driven_discovery --> theorizer__testability_triage
-  reproduction__evidence_gathering --> theorizer__testability_triage
-  theorizer__testability_triage --> theorizer__novelty_assessment
-  theorizer__theory_generation --> theorizer__theory_synthesis
-  theorizer__novelty_assessment --> theorizer__theory_synthesis
-  theorizer__testability_triage --> theorizer__theory_synthesis
-  theorizer__testability_triage --> verification__analysis
-  reproduction__data_driven_discovery --> verification__analysis
-  reproduction__evidence_gathering --> verification__analysis
-  verification__analysis --> verification__audit
-  theorizer__testability_triage --> verification__adjudicate
-  verification__analysis --> verification__adjudicate
-  verification__audit --> verification__adjudicate
-  verification --> verification_synthesis
-  theorizer__novelty_assessment --> verification_synthesis
-  data_provenance__provenance_synthesis --> gap_synthesis
-  reproduction__reproduction_synthesis --> gap_synthesis
-  theorizer__theory_synthesis --> gap_synthesis
-  verification_synthesis --> gap_synthesis
-  data_provenance__provenance_synthesis --> final_synthesis
-  reproduction__reproduction_synthesis --> final_synthesis
-  theorizer__theory_synthesis --> final_synthesis
-  verification_synthesis --> final_synthesis
-  gap_synthesis --> final_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
deleted file mode 100644
index 14f65a7..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/data_driven_discovery.schema.json
+++ /dev/null
@@ -1,152 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_driven_discovery.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_driven_discovery",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd b/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
deleted file mode 100644
index 3b46977..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/data_provenance.mmd
+++ /dev/null
@@ -1,16 +0,0 @@
-%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-  provenance_synthesis["provenance_synthesis"]
-  provenance_search --> provenance_extraction
-  provenance_search --> data_acquisition
-  provenance_extraction --> data_acquisition
-  provenance_search --> provenance_synthesis
-  provenance_extraction --> provenance_synthesis
-  data_acquisition --> provenance_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json b/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
deleted file mode 100644
index b7ac259..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/discovery_run.schema.json
+++ /dev/null
@@ -1,170 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_run.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "discovery_run",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
deleted file mode 100644
index 29cb31f..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/discovery_synthesis.schema.json
+++ /dev/null
@@ -1,271 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "discovery_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "interpretation": {
-          "type": "string"
-        },
-        "laws": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "deciding_experiment": {
-                "type": "string"
-              },
-              "effect_size_discovery": {
-                "type": "string"
-              },
-              "effect_size_holdout": {
-                "type": "string"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "surprise": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "surprise",
-              "outcome",
-              "deciding_experiment",
-              "effect_size_discovery",
-              "effect_size_holdout"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "run_id",
-        "laws",
-        "interpretation",
-        "next_steps",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "discovery_report": {
-      "$ref": "#/$defs/discovery_report"
-    }
-  },
-  "required": [
-    "discovery_report",
-    "artifacts"
-  ],
-  "title": "discovery_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
deleted file mode 100644
index 7a53a5b..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/evidence_extraction.schema.json
+++ /dev/null
@@ -1,132 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "artifacts"
-  ],
-  "title": "evidence_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json b/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
deleted file mode 100644
index c310796..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/evidence_gathering.schema.json
+++ /dev/null
@@ -1,121 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_gathering.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "datasets",
-    "artifacts"
-  ],
-  "title": "evidence_gathering",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json b/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
deleted file mode 100644
index 458fe42..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/experiment_design.schema.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "experiment_design": {
-      "additionalProperties": true,
-      "properties": {
-        "construct_equivalence": {
-          "enum": [
-            "equivalent",
-            "proxy",
-            "mismatch"
-          ]
-        },
-        "data_gap": {
-          "type": "string"
-        },
-        "experiment_design_query": {
-          "type": "string"
-        },
-        "experiment_name": {
-          "type": "string"
-        },
-        "feasibility": {
-          "enum": [
-            "feasible",
-            "proxy_only",
-            "data_unavailable",
-            "construct_mismatch"
-          ]
-        },
-        "independent_operationalization": {
-          "type": "string"
-        },
-        "plain_language_description": {
-          "type": "string"
-        },
-        "prespecified": {
-          "additionalProperties": true,
-          "properties": {
-            "metric": {
-              "type": "string"
-            },
-            "success_threshold": {
-              "type": "string"
-            },
-            "test": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "test",
-            "metric",
-            "success_threshold"
-          ],
-          "type": "object"
-        },
-        "required_data": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "experiment_name",
-        "plain_language_description",
-        "source_operationalization",
-        "independent_operationalization",
-        "construct_equivalence",
-        "feasibility",
-        "required_data",
-        "data_gap",
-        "experiment_design_query",
-        "prespecified"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/experiment_design.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "experiment_design": {
-      "$ref": "#/$defs/experiment_design"
-    }
-  },
-  "required": [
-    "experiment_design",
-    "artifacts"
-  ],
-  "title": "experiment_design",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
deleted file mode 100644
index b00f085..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/final_synthesis.schema.json
+++ /dev/null
@@ -1,289 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "research_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "inference_chain": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "chain": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "claim": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "claim",
-              "chain"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sub_reports": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "report_path",
-              "one_line"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "tensions_and_surprises": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "evidence": {
-                "type": "string"
-              },
-              "observation": {
-                "type": "string"
-              },
-              "where": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "observation",
-              "where",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_highlights": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "claim": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_was_done": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theory_highlights",
-        "inference_chain",
-        "what_was_done",
-        "sub_reports",
-        "tensions_and_surprises",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/final_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "research_report": {
-      "$ref": "#/$defs/research_report"
-    }
-  },
-  "required": [
-    "research_report",
-    "artifacts"
-  ],
-  "title": "final_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/flows.json b/plugins/asta/skills/research-step/assets/compiled/flows.json
deleted file mode 100644
index 907a432..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/flows.json
+++ /dev/null
@@ -1,6657 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "flows": {
-    "auto_discovery": {
-      "edges": [
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "discovery_run"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "discovery_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "discovery_synthesis"
-        }
-      ],
-      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta documents",
-            "asta generate-theories find-and-extract",
-            "asta autodiscovery create",
-            "asta autodiscovery upload",
-            "asta autodiscovery metadata"
-          ],
-          "id": "cohort_assembly",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
-          "name": "cohort_assembly",
-          "parent": null,
-          "replan": false,
-          "task": "cohort_assembly"
-        },
-        {
-          "chain": [
-            "asta autodiscovery submit",
-            "asta autodiscovery experiments"
-          ],
-          "id": "discovery_run",
-          "inputs": [
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
-          "name": "discovery_run",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_run"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__holdout_replication",
-          "inputs": [
-            "discovery_run",
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "holdout_replication",
-          "parent": "replication",
-          "replan": false,
-          "task": "holdout_replication"
-        },
-        {
-          "chain": [],
-          "id": "discovery_synthesis",
-          "inputs": [
-            "discovery_run",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
-          "name": "discovery_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_synthesis"
-        }
-      ]
-    },
-    "data_and_literature_grounded_theory_generation": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_provenance__data_acquisition",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "reproduction__law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "reproduction__replication__audit",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "reproduction__replication",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "adjudicate",
-          "source": "reproduction__replication__adjudicate",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "theorizer__evidence_extraction",
-          "target": "theorizer__theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__audit"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "verification__audit",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "verification",
-          "source": "verification",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "gap_synthesis",
-          "source": "gap_synthesis",
-          "target": "final_synthesis"
-        }
-      ],
-      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
-      "nodes": [
-        {
-          "id": "data_provenance",
-          "kind": "embed",
-          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
-          "name": "data_provenance",
-          "parent": null,
-          "replan": false,
-          "workflow": "data_provenance"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "data_provenance__provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "data_provenance__provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_provenance__data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "data_provenance__provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_synthesis"
-        },
-        {
-          "id": "reproduction",
-          "kind": "embed",
-          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
-          "name": "reproduction",
-          "parent": null,
-          "replan": false,
-          "workflow": "reproduction"
-        },
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "reproduction__data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "reproduction__evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "reproduction__replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": "reproduction",
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "reproduction__replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "reproduction_synthesis"
-        },
-        {
-          "id": "theorizer",
-          "kind": "embed",
-          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
-          "name": "theorizer",
-          "parent": null,
-          "replan": false,
-          "workflow": "theorizer"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "theorizer__evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theorizer__theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": "theorizer",
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theorizer__theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theorizer__theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "theorizer__novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "verification",
-          "kind": "group",
-          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
-          "name": "verification",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__analysis",
-          "inputs": [
-            "testability_triage",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "verification",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "verification",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "verification__adjudicate",
-          "inputs": [
-            "testability_triage",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
-          "name": "adjudicate",
-          "parent": "verification",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "verification_synthesis",
-          "inputs": [
-            "verification",
-            "novelty_assessment"
-          ],
-          "kind": "step",
-          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
-          "name": "verification_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "verification_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "gap_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
-          "name": "gap_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "gap_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "final_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis",
-            "gap_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
-          "name": "final_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "final_synthesis"
-        }
-      ]
-    },
-    "data_provenance": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_acquisition",
-          "target": "provenance_synthesis"
-        }
-      ],
-      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": null,
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_synthesis"
-        }
-      ]
-    },
-    "hypothesis_driven_research": {
-      "edges": [
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "hypothesis_formation"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "testing__data_acquisition",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "testing__audit",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "hypothesis_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testing",
-          "source": "testing",
-          "target": "hypothesis_synthesis"
-        }
-      ],
-      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "literature_review",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
-          "name": "literature_review",
-          "parent": null,
-          "replan": false,
-          "task": "literature_review"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "hypothesis_formation",
-          "inputs": [
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
-          "name": "hypothesis_formation",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_formation"
-        },
-        {
-          "id": "testing",
-          "kind": "group",
-          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
-          "name": "testing",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "testing__experiment_design",
-          "inputs": [
-            "hypothesis_formation",
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "testing",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "testing__data_acquisition",
-          "inputs": [
-            "experiment_design"
-          ],
-          "kind": "step",
-          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
-          "name": "data_acquisition",
-          "parent": "testing",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__analysis",
-          "inputs": [
-            "experiment_design",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "testing",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "testing",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "testing__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
-          "name": "adjudicate",
-          "parent": "testing",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "hypothesis_synthesis",
-          "inputs": [
-            "hypothesis_formation",
-            "testing"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
-          "name": "hypothesis_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_synthesis"
-        }
-      ]
-    },
-    "reproduction": {
-      "edges": [
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "data_driven_discovery",
-          "target": "law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "replication__audit",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "reproduction_synthesis"
-        }
-      ],
-      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
-      "nodes": [
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "reproduction_synthesis"
-        }
-      ]
-    },
-    "theorizer": {
-      "edges": [
-        {
-          "external": true,
-          "input": "law_extraction",
-          "source": "ext__law_extraction",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": true,
-          "input": "adjudicate",
-          "source": "ext__adjudicate",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "evidence_extraction",
-          "target": "theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "data_driven_discovery",
-          "source": "ext__data_driven_discovery",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "evidence_gathering",
-          "source": "ext__evidence_gathering",
-          "target": "testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "novelty_assessment",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "theory_synthesis"
-        }
-      ],
-      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
-      "nodes": [
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": null,
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": null,
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": null,
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "ext__adjudicate",
-          "kind": "external",
-          "mission": "",
-          "name": "adjudicate",
-          "parent": null,
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "id": "ext__data_driven_discovery",
-          "kind": "external",
-          "mission": "",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "id": "ext__evidence_gathering",
-          "kind": "external",
-          "mission": "",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "ext__law_extraction",
-          "kind": "external",
-          "mission": "",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        }
-      ]
-    }
-  },
-  "format_version": 1,
-  "schema_version": 2,
-  "tasks": {
-    "adjudicate": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/adjudicate.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "artifacts"
-        ],
-        "title": "adjudicate",
-        "type": "object"
-      }
-    },
-    "analysis": {
-      "output": {
-        "analysis": "analysis",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "analysis": {
-            "additionalProperties": true,
-            "properties": {
-              "assumptions": {
-                "type": "string"
-              },
-              "code": {
-                "type": "string"
-              },
-              "final_answer": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "final_answer",
-              "assumptions",
-              "code"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/analysis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "analysis": {
-            "$ref": "#/$defs/analysis"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "analysis",
-          "figures",
-          "artifacts"
-        ],
-        "title": "analysis",
-        "type": "object"
-      }
-    },
-    "audit": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "audit_report": "audit_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "audit_report": {
-            "additionalProperties": true,
-            "properties": {
-              "artifacts_found": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "challenges": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "check": {
-                      "type": "string"
-                    },
-                    "concern": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "concern",
-                    "check",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "recommended_adjustment": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "verdict_survives": {
-                "type": "boolean"
-              }
-            },
-            "required": [
-              "subject_id",
-              "challenges",
-              "artifacts_found",
-              "verdict_survives",
-              "recommended_adjustment"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/audit.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "audit_report": {
-            "$ref": "#/$defs/audit_report"
-          }
-        },
-        "required": [
-          "audit_report",
-          "artifacts"
-        ],
-        "title": "audit",
-        "type": "object"
-      }
-    },
-    "cohort_assembly": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "cohort": "cohort",
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "cohort": {
-            "additionalProperties": true,
-            "properties": {
-              "discovery_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "exclusion_criteria": {
-                "type": "string"
-              },
-              "holdout_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "id": {
-                "type": "string"
-              },
-              "inclusion_criteria": {
-                "type": "string"
-              },
-              "research_question": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source_data_sources": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "research_question",
-              "inclusion_criteria",
-              "exclusion_criteria",
-              "sampling",
-              "source_data_sources",
-              "discovery_subset",
-              "holdout_subset",
-              "run_id"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/cohort_assembly.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "cohort": {
-            "$ref": "#/$defs/cohort"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "cohort",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "cohort_assembly",
-        "type": "object"
-      }
-    },
-    "data_acquisition": {
-      "output": {
-        "acquisitions": [
-          "acquisition"
-        ],
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "acquisition": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "validation_note": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "access_status",
-              "local_path",
-              "dataset_id",
-              "validation_note"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_acquisition.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "acquisitions": {
-            "items": {
-              "$ref": "#/$defs/acquisition"
-            },
-            "type": "array"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "acquisitions",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_acquisition",
-        "type": "object"
-      }
-    },
-    "data_driven_discovery": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_driven_discovery.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_driven_discovery",
-        "type": "object"
-      }
-    },
-    "discovery_run": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_run.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "discovery_run",
-        "type": "object"
-      }
-    },
-    "discovery_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "discovery_report": "discovery_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "discovery_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "interpretation": {
-                "type": "string"
-              },
-              "laws": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "deciding_experiment": {
-                      "type": "string"
-                    },
-                    "effect_size_discovery": {
-                      "type": "string"
-                    },
-                    "effect_size_holdout": {
-                      "type": "string"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "surprise": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "surprise",
-                    "outcome",
-                    "deciding_experiment",
-                    "effect_size_discovery",
-                    "effect_size_holdout"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "run_id",
-              "laws",
-              "interpretation",
-              "next_steps",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "discovery_report": {
-            "$ref": "#/$defs/discovery_report"
-          }
-        },
-        "required": [
-          "discovery_report",
-          "artifacts"
-        ],
-        "title": "discovery_synthesis",
-        "type": "object"
-      }
-    },
-    "evidence_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "artifacts"
-        ],
-        "title": "evidence_extraction",
-        "type": "object"
-      }
-    },
-    "evidence_gathering": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_gathering.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "datasets",
-          "artifacts"
-        ],
-        "title": "evidence_gathering",
-        "type": "object"
-      }
-    },
-    "experiment_design": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "experiment_design": "experiment_design"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "experiment_design": {
-            "additionalProperties": true,
-            "properties": {
-              "construct_equivalence": {
-                "enum": [
-                  "equivalent",
-                  "proxy",
-                  "mismatch"
-                ]
-              },
-              "data_gap": {
-                "type": "string"
-              },
-              "experiment_design_query": {
-                "type": "string"
-              },
-              "experiment_name": {
-                "type": "string"
-              },
-              "feasibility": {
-                "enum": [
-                  "feasible",
-                  "proxy_only",
-                  "data_unavailable",
-                  "construct_mismatch"
-                ]
-              },
-              "independent_operationalization": {
-                "type": "string"
-              },
-              "plain_language_description": {
-                "type": "string"
-              },
-              "prespecified": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "experiment_name",
-              "plain_language_description",
-              "source_operationalization",
-              "independent_operationalization",
-              "construct_equivalence",
-              "feasibility",
-              "required_data",
-              "data_gap",
-              "experiment_design_query",
-              "prespecified"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/experiment_design.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "experiment_design": {
-            "$ref": "#/$defs/experiment_design"
-          }
-        },
-        "required": [
-          "experiment_design",
-          "artifacts"
-        ],
-        "title": "experiment_design",
-        "type": "object"
-      }
-    },
-    "final_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "research_report": "research_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "research_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "inference_chain": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "chain": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "claim": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "claim",
-                    "chain"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sub_reports": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "kind": {
-                      "type": "string"
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "report_path": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "kind",
-                    "report_path",
-                    "one_line"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "tensions_and_surprises": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "observation": {
-                      "type": "string"
-                    },
-                    "where": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "observation",
-                    "where",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_highlights": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "claim": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_was_done": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theory_highlights",
-              "inference_chain",
-              "what_was_done",
-              "sub_reports",
-              "tensions_and_surprises",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/final_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "research_report": {
-            "$ref": "#/$defs/research_report"
-          }
-        },
-        "required": [
-          "research_report",
-          "artifacts"
-        ],
-        "title": "final_synthesis",
-        "type": "object"
-      }
-    },
-    "gap_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_gaps_report": "data_gaps_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_gaps_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "arose_in": {
-                      "type": "string"
-                    },
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity",
-                    "arose_in"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "gaps",
-              "next_steps",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/gap_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_gaps_report": {
-            "$ref": "#/$defs/data_gaps_report"
-          }
-        },
-        "required": [
-          "data_gaps_report",
-          "artifacts"
-        ],
-        "title": "gap_synthesis",
-        "type": "object"
-      }
-    },
-    "holdout_replication": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/holdout_replication.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "figures",
-          "artifacts"
-        ],
-        "title": "holdout_replication",
-        "type": "object"
-      }
-    },
-    "hypothesis_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypotheses": [
-          "hypothesis"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "hypothesis": {
-            "additionalProperties": true,
-            "properties": {
-              "falsifiable_prediction": {
-                "type": "string"
-              },
-              "grounds": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "rationale": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "rationale",
-              "falsifiable_prediction",
-              "grounds"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypotheses": {
-            "items": {
-              "$ref": "#/$defs/hypothesis"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "hypotheses",
-          "artifacts"
-        ],
-        "title": "hypothesis_formation",
-        "type": "object"
-      }
-    },
-    "hypothesis_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypothesis_report": "hypothesis_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "hypothesis_report": {
-            "additionalProperties": true,
-            "properties": {
-              "answer": {
-                "type": "string"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "hypothesis_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "hypothesis_id",
-                    "statement",
-                    "outcome",
-                    "effect_size_observed",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_questions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "question": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "question",
-              "ledger",
-              "answer",
-              "open_questions",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypothesis_report": {
-            "$ref": "#/$defs/hypothesis_report"
-          }
-        },
-        "required": [
-          "hypothesis_report",
-          "artifacts"
-        ],
-        "title": "hypothesis_synthesis",
-        "type": "object"
-      }
-    },
-    "law_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/law_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "law_extraction",
-        "type": "object"
-      }
-    },
-    "literature_review": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "literature_review": "literature_review"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "literature_review": {
-            "additionalProperties": true,
-            "properties": {
-              "citations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "corpus_id": {
-                      "type": "number"
-                    },
-                    "id": {
-                      "type": "string"
-                    },
-                    "relevance": {
-                      "type": "string"
-                    },
-                    "title": {
-                      "type": "string"
-                    },
-                    "url": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "id",
-                    "corpus_id",
-                    "title",
-                    "url",
-                    "relevance"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "key_findings": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_gaps": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "summary": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "summary",
-              "key_findings",
-              "open_gaps",
-              "citations"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/literature_review.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "literature_review": {
-            "$ref": "#/$defs/literature_review"
-          }
-        },
-        "required": [
-          "literature_review",
-          "artifacts"
-        ],
-        "title": "literature_review",
-        "type": "object"
-      }
-    },
-    "novelty_assessment": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_evaluations": [
-          "theory_evaluation"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_evaluation": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "overall_support": {
-                "enum": [
-                  "supports",
-                  "mixed",
-                  "contradicts",
-                  "inconclusive"
-                ]
-              },
-              "overall_support_raw": {
-                "type": "string"
-              },
-              "statement_evaluations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "explanation": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "statement_index": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "statement_index",
-                    "novelty",
-                    "explanation"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "theory_id",
-              "novelty",
-              "overall_support",
-              "explanation",
-              "statement_evaluations"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/novelty_assessment.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_evaluations": {
-            "items": {
-              "$ref": "#/$defs/theory_evaluation"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theory_evaluations",
-          "artifacts"
-        ],
-        "title": "novelty_assessment",
-        "type": "object"
-      }
-    },
-    "provenance_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data",
-        "source_access": [
-          "source_access"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "source_access": {
-            "additionalProperties": true,
-            "properties": {
-              "data_availability": {
-                "type": "string"
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "identifier": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "data_availability",
-              "repository",
-              "identifier"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          },
-          "source_access": {
-            "items": {
-              "$ref": "#/$defs/source_access"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "source_access",
-          "artifacts"
-        ],
-        "title": "provenance_extraction",
-        "type": "object"
-      }
-    },
-    "provenance_search": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_sources": [
-          "data_source"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_source": {
-            "additionalProperties": true,
-            "properties": {
-              "dataset_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "dataset_id",
-              "paper_id",
-              "paper_title",
-              "paper_url"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_search.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_sources": {
-            "items": {
-              "$ref": "#/$defs/data_source"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "data_sources",
-          "artifacts"
-        ],
-        "title": "provenance_search",
-        "type": "object"
-      }
-    },
-    "provenance_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "provenance_report": "provenance_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "provenance_report": {
-            "additionalProperties": true,
-            "properties": {
-              "acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "not_acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sources": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "access_status": {
-                      "enum": [
-                        "acquired",
-                        "open_unfetched",
-                        "restricted",
-                        "not_found"
-                      ]
-                    },
-                    "dataset_id": {
-                      "type": "string"
-                    },
-                    "local_path": {
-                      "type": "string"
-                    },
-                    "paper_title": {
-                      "type": "string"
-                    },
-                    "paper_url": {
-                      "type": "string"
-                    },
-                    "repository": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "dataset_id",
-                    "paper_title",
-                    "paper_url",
-                    "repository",
-                    "access_status",
-                    "local_path"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "sources",
-              "method_note",
-              "acquired",
-              "not_acquired",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "provenance_report": {
-            "$ref": "#/$defs/provenance_report"
-          }
-        },
-        "required": [
-          "provenance_report",
-          "artifacts"
-        ],
-        "title": "provenance_synthesis",
-        "type": "object"
-      }
-    },
-    "reproduction_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "reproduction_report": "reproduction_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "reproduction_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "laws_ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "effect_size_source": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "independence_axes": {
-                      "items": {
-                        "enum": [
-                          "region",
-                          "instrument",
-                          "method",
-                          "construct",
-                          "temporal",
-                          "population"
-                        ]
-                      },
-                      "type": "array"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "testability": {
-                      "enum": [
-                        "tested",
-                        "proxy_only",
-                        "untestable"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "outcome",
-                    "testability",
-                    "effect_size_source",
-                    "effect_size_observed",
-                    "independence_axes",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_failed_or_untestable": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_held": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "method_note",
-              "laws_ledger",
-              "what_held",
-              "what_failed_or_untestable",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/reproduction_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "reproduction_report": {
-            "$ref": "#/$defs/reproduction_report"
-          }
-        },
-        "required": [
-          "reproduction_report",
-          "artifacts"
-        ],
-        "title": "reproduction_synthesis",
-        "type": "object"
-      }
-    },
-    "testability_triage": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "testability_triage": "testability_triage"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "testability_triage": {
-            "additionalProperties": true,
-            "properties": {
-              "assessments": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "available_data": {
-                      "type": "string"
-                    },
-                    "gap": {
-                      "type": "string"
-                    },
-                    "proposed_test": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "metric": {
-                          "type": "string"
-                        },
-                        "success_threshold": {
-                          "type": "string"
-                        },
-                        "test": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "test",
-                        "metric",
-                        "success_threshold"
-                      ],
-                      "type": "object"
-                    },
-                    "required_data": {
-                      "type": "string"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "testable_now",
-                    "available_data",
-                    "required_data",
-                    "proposed_test",
-                    "gap"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "testable_theory_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "assessments",
-              "testable_theory_ids"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/testability_triage.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "testability_triage": {
-            "$ref": "#/$defs/testability_triage"
-          }
-        },
-        "required": [
-          "testability_triage",
-          "artifacts"
-        ],
-        "title": "testability_triage",
-        "type": "object"
-      }
-    },
-    "theory_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theories": [
-          "theory"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory": {
-            "additionalProperties": true,
-            "properties": {
-              "components": {
-                "additionalProperties": true,
-                "properties": {
-                  "generation_objective": {
-                    "type": "string"
-                  },
-                  "new_predictions_likely": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "new_predictions_unknown": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statements": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "conflicting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "statement_name": {
-                          "type": "string"
-                        },
-                        "supporting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "theory_statement": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "statement_name",
-                        "theory_statement",
-                        "supporting_evidence",
-                        "conflicting_evidence"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "unaccounted_for": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "generation_objective",
-                  "theory_statements",
-                  "new_predictions_likely",
-                  "new_predictions_unknown",
-                  "unaccounted_for"
-                ],
-                "type": "object"
-              },
-              "description": {
-                "type": "string"
-              },
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "name": {
-                "type": "string"
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "theory_query": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "name",
-              "description",
-              "theory_query",
-              "objective",
-              "grounds_law_ids",
-              "supporting_evidence_ids",
-              "components"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theories": {
-            "items": {
-              "$ref": "#/$defs/theory"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theories",
-          "artifacts"
-        ],
-        "title": "theory_formation",
-        "type": "object"
-      }
-    },
-    "theory_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_report": "theory_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "new_predictions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "novelty_summary": {
-                "type": "string"
-              },
-              "open_threads": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "theories": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "grounds_law_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "name": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "objective": {
-                      "enum": [
-                        "accuracy_focused",
-                        "novelty_focused"
-                      ]
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "supporting_evidence_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "name",
-                    "objective",
-                    "one_line",
-                    "grounds_law_ids",
-                    "novelty",
-                    "testable_now",
-                    "supporting_evidence_ids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theories",
-              "novelty_summary",
-              "new_predictions",
-              "open_threads",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_report": {
-            "$ref": "#/$defs/theory_report"
-          }
-        },
-        "required": [
-          "theory_report",
-          "artifacts"
-        ],
-        "title": "theory_synthesis",
-        "type": "object"
-      }
-    },
-    "verification_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "verification_report": "verification_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "verification_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "novelty_by_verification": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "audit_survived": {
-                      "type": "boolean"
-                    },
-                    "claim": {
-                      "type": "string"
-                    },
-                    "data_used": {
-                      "type": "string"
-                    },
-                    "effect_size": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome",
-                    "effect_size",
-                    "data_used",
-                    "audit_survived"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_could_not_be_tested": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_was_tested": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "novelty_by_verification",
-              "what_was_tested",
-              "what_could_not_be_tested",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/verification_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "verification_report": {
-            "$ref": "#/$defs/verification_report"
-          }
-        },
-        "required": [
-          "verification_report",
-          "artifacts"
-        ],
-        "title": "verification_synthesis",
-        "type": "object"
-      }
-    }
-  }
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
deleted file mode 100644
index 760fbb5..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/gap_synthesis.schema.json
+++ /dev/null
@@ -1,221 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_gaps_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "arose_in": {
-                "type": "string"
-              },
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity",
-              "arose_in"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "gaps",
-        "next_steps",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/gap_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_gaps_report": {
-      "$ref": "#/$defs/data_gaps_report"
-    }
-  },
-  "required": [
-    "data_gaps_report",
-    "artifacts"
-  ],
-  "title": "gap_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json b/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
deleted file mode 100644
index 9d18252..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/holdout_replication.schema.json
+++ /dev/null
@@ -1,167 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/holdout_replication.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "figures",
-    "artifacts"
-  ],
-  "title": "holdout_replication",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
deleted file mode 100644
index e996ef7..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  literature_review["literature_review<br/>asta literature find · asta papers search"]
-  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph testing["testing (at replan)"]
-    testing__experiment_design["experiment_design<br/>asta experiment"]
-    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__adjudicate["adjudicate"]
-  end
-  class testing replan
-  hypothesis_synthesis["hypothesis_synthesis"]
-  literature_review --> hypothesis_formation
-  hypothesis_formation --> testing__experiment_design
-  literature_review --> testing__experiment_design
-  testing__experiment_design --> testing__data_acquisition
-  testing__experiment_design --> testing__analysis
-  testing__data_acquisition --> testing__analysis
-  testing__analysis --> testing__audit
-  testing__experiment_design --> testing__adjudicate
-  testing__analysis --> testing__adjudicate
-  testing__audit --> testing__adjudicate
-  hypothesis_formation --> hypothesis_synthesis
-  testing --> hypothesis_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
deleted file mode 100644
index 694d94f..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/hypothesis_formation.schema.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "hypothesis": {
-      "additionalProperties": true,
-      "properties": {
-        "falsifiable_prediction": {
-          "type": "string"
-        },
-        "grounds": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "rationale": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "rationale",
-        "falsifiable_prediction",
-        "grounds"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypotheses": {
-      "items": {
-        "$ref": "#/$defs/hypothesis"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "hypotheses",
-    "artifacts"
-  ],
-  "title": "hypothesis_formation",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
deleted file mode 100644
index b2fe767..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
+++ /dev/null
@@ -1,224 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "hypothesis_report": {
-      "additionalProperties": true,
-      "properties": {
-        "answer": {
-          "type": "string"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "hypothesis_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "hypothesis_id",
-              "statement",
-              "outcome",
-              "effect_size_observed",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_questions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "question": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "question",
-        "ledger",
-        "answer",
-        "open_questions",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypothesis_report": {
-      "$ref": "#/$defs/hypothesis_report"
-    }
-  },
-  "required": [
-    "hypothesis_report",
-    "artifacts"
-  ],
-  "title": "hypothesis_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
deleted file mode 100644
index 7b3e1fc..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/law_extraction.schema.json
+++ /dev/null
@@ -1,139 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/law_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "law_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json b/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
deleted file mode 100644
index 14df7b7..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/literature_review.schema.json
+++ /dev/null
@@ -1,150 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "literature_review": {
-      "additionalProperties": true,
-      "properties": {
-        "citations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "corpus_id": {
-                "type": "number"
-              },
-              "id": {
-                "type": "string"
-              },
-              "relevance": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "corpus_id",
-              "title",
-              "url",
-              "relevance"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "key_findings": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_gaps": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "summary": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "summary",
-        "key_findings",
-        "open_gaps",
-        "citations"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/literature_review.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "literature_review": {
-      "$ref": "#/$defs/literature_review"
-    }
-  },
-  "required": [
-    "literature_review",
-    "artifacts"
-  ],
-  "title": "literature_review",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json b/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
deleted file mode 100644
index 729f9fe..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/novelty_assessment.schema.json
+++ /dev/null
@@ -1,147 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_evaluation": {
-      "additionalProperties": true,
-      "properties": {
-        "explanation": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "novelty": {
-          "enum": [
-            "established",
-            "derivable",
-            "genuinely_new"
-          ]
-        },
-        "overall_support": {
-          "enum": [
-            "supports",
-            "mixed",
-            "contradicts",
-            "inconclusive"
-          ]
-        },
-        "overall_support_raw": {
-          "type": "string"
-        },
-        "statement_evaluations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "statement_index": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "statement_index",
-              "novelty",
-              "explanation"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "theory_id",
-        "novelty",
-        "overall_support",
-        "explanation",
-        "statement_evaluations"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/novelty_assessment.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_evaluations": {
-      "items": {
-        "$ref": "#/$defs/theory_evaluation"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theory_evaluations",
-    "artifacts"
-  ],
-  "title": "novelty_assessment",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
deleted file mode 100644
index 2bd4ea8..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/provenance_extraction.schema.json
+++ /dev/null
@@ -1,163 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "source_access": {
-      "additionalProperties": true,
-      "properties": {
-        "data_availability": {
-          "type": "string"
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "identifier": {
-          "type": "string"
-        },
-        "repository": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "data_availability",
-        "repository",
-        "identifier"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    },
-    "source_access": {
-      "items": {
-        "$ref": "#/$defs/source_access"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "source_access",
-    "artifacts"
-  ],
-  "title": "provenance_extraction",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
deleted file mode 100644
index 8a924d9..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/provenance_search.schema.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_source": {
-      "additionalProperties": true,
-      "properties": {
-        "dataset_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "paper_title": {
-          "type": "string"
-        },
-        "paper_url": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "dataset_id",
-        "paper_id",
-        "paper_title",
-        "paper_url"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_search.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_sources": {
-      "items": {
-        "$ref": "#/$defs/data_source"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "data_sources",
-    "artifacts"
-  ],
-  "title": "provenance_search",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
deleted file mode 100644
index 0d43a6f..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/provenance_synthesis.schema.json
+++ /dev/null
@@ -1,230 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "provenance_report": {
-      "additionalProperties": true,
-      "properties": {
-        "acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "not_acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sources": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "dataset_id",
-              "paper_title",
-              "paper_url",
-              "repository",
-              "access_status",
-              "local_path"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "sources",
-        "method_note",
-        "acquired",
-        "not_acquired",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "provenance_report": {
-      "$ref": "#/$defs/provenance_report"
-    }
-  },
-  "required": [
-    "provenance_report",
-    "artifacts"
-  ],
-  "title": "provenance_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd b/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
deleted file mode 100644
index 4bb9e6e..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/reproduction.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-  law_extraction["law_extraction"]
-  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-  subgraph replication["replication (at replan)"]
-    replication__experiment_design["experiment_design<br/>asta experiment"]
-    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__adjudicate["adjudicate"]
-  end
-  class replication replan
-  reproduction_synthesis["reproduction_synthesis"]
-  data_driven_discovery --> law_extraction
-  law_extraction --> evidence_gathering
-  law_extraction --> replication__experiment_design
-  evidence_gathering --> replication__experiment_design
-  replication__experiment_design --> replication__analysis
-  evidence_gathering --> replication__analysis
-  replication__analysis --> replication__audit
-  replication__experiment_design --> replication__adjudicate
-  replication__analysis --> replication__adjudicate
-  replication__audit --> replication__adjudicate
-  law_extraction --> reproduction_synthesis
-  replication --> reproduction_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
deleted file mode 100644
index 570e076..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
+++ /dev/null
@@ -1,253 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "reproduction_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "laws_ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "outcome",
-              "testability",
-              "effect_size_source",
-              "effect_size_observed",
-              "independence_axes",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_failed_or_untestable": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_held": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "method_note",
-        "laws_ledger",
-        "what_held",
-        "what_failed_or_untestable",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/reproduction_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "reproduction_report": {
-      "$ref": "#/$defs/reproduction_report"
-    }
-  },
-  "required": [
-    "reproduction_report",
-    "artifacts"
-  ],
-  "title": "reproduction_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json b/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
deleted file mode 100644
index 8968920..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/testability_triage.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "testability_triage": {
-      "additionalProperties": true,
-      "properties": {
-        "assessments": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "available_data": {
-                "type": "string"
-              },
-              "gap": {
-                "type": "string"
-              },
-              "proposed_test": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "testable_now",
-              "available_data",
-              "required_data",
-              "proposed_test",
-              "gap"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "testable_theory_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "assessments",
-        "testable_theory_ids"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/testability_triage.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "testability_triage": {
-      "$ref": "#/$defs/testability_triage"
-    }
-  },
-  "required": [
-    "testability_triage",
-    "artifacts"
-  ],
-  "title": "testability_triage",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd b/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
deleted file mode 100644
index 59e2d0f..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/theorizer.mmd
+++ /dev/null
@@ -1,27 +0,0 @@
-%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph theory_generation["theory_generation"]
-    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-  end
-  testability_triage["testability_triage"]
-  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-  theory_synthesis["theory_synthesis"]
-  ext__adjudicate(["adjudicate (external)"]):::external
-  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
-  ext__evidence_gathering(["evidence_gathering (external)"]):::external
-  ext__law_extraction(["law_extraction (external)"]):::external
-  ext__law_extraction -.-> evidence_extraction
-  ext__adjudicate -.-> evidence_extraction
-  evidence_extraction --> theory_generation__theory_formation
-  theory_generation --> testability_triage
-  ext__data_driven_discovery -.-> testability_triage
-  ext__evidence_gathering -.-> testability_triage
-  testability_triage --> novelty_assessment
-  theory_generation --> theory_synthesis
-  novelty_assessment --> theory_synthesis
-  testability_triage --> theory_synthesis
diff --git a/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json b/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
deleted file mode 100644
index 7373cec..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/theory_formation.schema.json
+++ /dev/null
@@ -1,240 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory": {
-      "additionalProperties": true,
-      "properties": {
-        "components": {
-          "additionalProperties": true,
-          "properties": {
-            "generation_objective": {
-              "type": "string"
-            },
-            "new_predictions_likely": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "new_predictions_unknown": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "theory_statements": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "statement_name": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statement": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "statement_name",
-                  "theory_statement",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            },
-            "unaccounted_for": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "text": {
-                    "type": "string"
-                  },
-                  "uuids": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "text",
-                  "uuids"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "generation_objective",
-            "theory_statements",
-            "new_predictions_likely",
-            "new_predictions_unknown",
-            "unaccounted_for"
-          ],
-          "type": "object"
-        },
-        "description": {
-          "type": "string"
-        },
-        "grounds_law_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        },
-        "objective": {
-          "enum": [
-            "accuracy_focused",
-            "novelty_focused"
-          ]
-        },
-        "supporting_evidence_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "theory_query": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "name",
-        "description",
-        "theory_query",
-        "objective",
-        "grounds_law_ids",
-        "supporting_evidence_ids",
-        "components"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theories": {
-      "items": {
-        "$ref": "#/$defs/theory"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theories",
-    "artifacts"
-  ],
-  "title": "theory_formation",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
deleted file mode 100644
index dd2768e..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/theory_synthesis.schema.json
+++ /dev/null
@@ -1,280 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "new_predictions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "novelty_summary": {
-          "type": "string"
-        },
-        "open_threads": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "theories": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "name": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "name",
-              "objective",
-              "one_line",
-              "grounds_law_ids",
-              "novelty",
-              "testable_now",
-              "supporting_evidence_ids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theories",
-        "novelty_summary",
-        "new_predictions",
-        "open_threads",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_report": {
-      "$ref": "#/$defs/theory_report"
-    }
-  },
-  "required": [
-    "theory_report",
-    "artifacts"
-  ],
-  "title": "theory_synthesis",
-  "type": "object"
-}
diff --git a/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json b/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
deleted file mode 100644
index 8d1a639..0000000
--- a/plugins/asta/skills/research-step/assets/compiled/verification_synthesis.schema.json
+++ /dev/null
@@ -1,232 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "verification_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "novelty_by_verification": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "audit_survived": {
-                "type": "boolean"
-              },
-              "claim": {
-                "type": "string"
-              },
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome",
-              "effect_size",
-              "data_used",
-              "audit_survived"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_could_not_be_tested": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_was_tested": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "novelty_by_verification",
-        "what_was_tested",
-        "what_could_not_be_tested",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/verification_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "verification_report": {
-      "$ref": "#/$defs/verification_report"
-    }
-  },
-  "required": [
-    "verification_report",
-    "artifacts"
-  ],
-  "title": "verification_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/adjudicate.schema.json b/skills/research-step/assets/compiled/adjudicate.schema.json
deleted file mode 100644
index ccfb9d1..0000000
--- a/skills/research-step/assets/compiled/adjudicate.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/adjudicate.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "artifacts"
-  ],
-  "title": "adjudicate",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/analysis.schema.json b/skills/research-step/assets/compiled/analysis.schema.json
deleted file mode 100644
index 55e557d..0000000
--- a/skills/research-step/assets/compiled/analysis.schema.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "analysis": {
-      "additionalProperties": true,
-      "properties": {
-        "assumptions": {
-          "type": "string"
-        },
-        "code": {
-          "type": "string"
-        },
-        "final_answer": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "final_answer",
-        "assumptions",
-        "code"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/analysis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "analysis": {
-      "$ref": "#/$defs/analysis"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "analysis",
-    "figures",
-    "artifacts"
-  ],
-  "title": "analysis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/audit.schema.json b/skills/research-step/assets/compiled/audit.schema.json
deleted file mode 100644
index ca21120..0000000
--- a/skills/research-step/assets/compiled/audit.schema.json
+++ /dev/null
@@ -1,127 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "audit_report": {
-      "additionalProperties": true,
-      "properties": {
-        "artifacts_found": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "challenges": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "check": {
-                "type": "string"
-              },
-              "concern": {
-                "type": "string"
-              },
-              "outcome": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "concern",
-              "check",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "recommended_adjustment": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "verdict_survives": {
-          "type": "boolean"
-        }
-      },
-      "required": [
-        "subject_id",
-        "challenges",
-        "artifacts_found",
-        "verdict_survives",
-        "recommended_adjustment"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/audit.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "audit_report": {
-      "$ref": "#/$defs/audit_report"
-    }
-  },
-  "required": [
-    "audit_report",
-    "artifacts"
-  ],
-  "title": "audit",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/auto_discovery.mmd b/skills/research-step/assets/compiled/auto_discovery.mmd
deleted file mode 100644
index 14cd992..0000000
--- a/skills/research-step/assets/compiled/auto_discovery.mmd
+++ /dev/null
@@ -1,18 +0,0 @@
-%% auto_discovery — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  cohort_assembly["cohort_assembly<br/>asta literature find · asta documents · asta generate-theories find-and-extract · asta autodiscovery create · asta autodiscovery upload · asta autodiscovery metadata"]
-  discovery_run["discovery_run<br/>asta autodiscovery submit · asta autodiscovery experiments"]
-  subgraph replication["replication (at replan)"]
-    replication__holdout_replication["holdout_replication<br/>asta analyze-data submit · asta analyze-data poll"]
-  end
-  class replication replan
-  discovery_synthesis["discovery_synthesis"]
-  cohort_assembly --> discovery_run
-  discovery_run --> replication__holdout_replication
-  cohort_assembly --> replication__holdout_replication
-  discovery_run --> discovery_synthesis
-  replication --> discovery_synthesis
diff --git a/skills/research-step/assets/compiled/cohort_assembly.schema.json b/skills/research-step/assets/compiled/cohort_assembly.schema.json
deleted file mode 100644
index 4866540..0000000
--- a/skills/research-step/assets/compiled/cohort_assembly.schema.json
+++ /dev/null
@@ -1,206 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "cohort": {
-      "additionalProperties": true,
-      "properties": {
-        "discovery_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "exclusion_criteria": {
-          "type": "string"
-        },
-        "holdout_subset": {
-          "additionalProperties": true,
-          "properties": {
-            "definition": {
-              "type": "string"
-            },
-            "n": {
-              "type": "number"
-            },
-            "path": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "definition",
-            "n",
-            "path"
-          ],
-          "type": "object"
-        },
-        "id": {
-          "type": "string"
-        },
-        "inclusion_criteria": {
-          "type": "string"
-        },
-        "research_question": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source_data_sources": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "research_question",
-        "inclusion_criteria",
-        "exclusion_criteria",
-        "sampling",
-        "source_data_sources",
-        "discovery_subset",
-        "holdout_subset",
-        "run_id"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/cohort_assembly.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "cohort": {
-      "$ref": "#/$defs/cohort"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "cohort",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "cohort_assembly",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/data_acquisition.schema.json b/skills/research-step/assets/compiled/data_acquisition.schema.json
deleted file mode 100644
index 0bec23c..0000000
--- a/skills/research-step/assets/compiled/data_acquisition.schema.json
+++ /dev/null
@@ -1,161 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "acquisition": {
-      "additionalProperties": true,
-      "properties": {
-        "access_status": {
-          "enum": [
-            "acquired",
-            "open_unfetched",
-            "restricted",
-            "not_found"
-          ]
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "dataset_id": {
-          "type": "string"
-        },
-        "local_path": {
-          "type": "string"
-        },
-        "validation_note": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "access_status",
-        "local_path",
-        "dataset_id",
-        "validation_note"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_acquisition.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "acquisitions": {
-      "items": {
-        "$ref": "#/$defs/acquisition"
-      },
-      "type": "array"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "acquisitions",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_acquisition",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
deleted file mode 100644
index cb56eed..0000000
--- a/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
+++ /dev/null
@@ -1,92 +0,0 @@
-%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  subgraph data_provenance["data_provenance [flow: data_provenance]"]
-    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    data_provenance__provenance_synthesis["provenance_synthesis"]
-  end
-  class data_provenance embed
-  subgraph reproduction["reproduction [flow: reproduction]"]
-    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-    reproduction__law_extraction["law_extraction"]
-    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-    subgraph reproduction__replication["replication (at replan)"]
-      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
-      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-      reproduction__replication__adjudicate["adjudicate"]
-    end
-    class reproduction__replication replan
-    reproduction__reproduction_synthesis["reproduction_synthesis"]
-  end
-  class reproduction embed
-  subgraph theorizer["theorizer [flow: theorizer]"]
-    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-    subgraph theorizer__theory_generation["theory_generation"]
-      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-    end
-    theorizer__testability_triage["testability_triage"]
-    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-    theorizer__theory_synthesis["theory_synthesis"]
-  end
-  class theorizer embed
-  subgraph verification["verification (at replan)"]
-    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    verification__adjudicate["adjudicate"]
-  end
-  class verification replan
-  verification_synthesis["verification_synthesis"]
-  gap_synthesis["gap_synthesis"]
-  final_synthesis["final_synthesis"]
-  data_provenance__provenance_search --> data_provenance__provenance_extraction
-  data_provenance__provenance_search --> data_provenance__data_acquisition
-  data_provenance__provenance_extraction --> data_provenance__data_acquisition
-  data_provenance__provenance_search --> data_provenance__provenance_synthesis
-  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
-  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
-  reproduction__data_driven_discovery --> reproduction__law_extraction
-  reproduction__law_extraction --> reproduction__evidence_gathering
-  reproduction__law_extraction --> reproduction__replication__experiment_design
-  reproduction__evidence_gathering --> reproduction__replication__experiment_design
-  reproduction__replication__experiment_design --> reproduction__replication__analysis
-  reproduction__evidence_gathering --> reproduction__replication__analysis
-  reproduction__replication__analysis --> reproduction__replication__audit
-  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
-  reproduction__replication__analysis --> reproduction__replication__adjudicate
-  reproduction__replication__audit --> reproduction__replication__adjudicate
-  reproduction__law_extraction --> reproduction__reproduction_synthesis
-  reproduction__replication --> reproduction__reproduction_synthesis
-  reproduction__law_extraction --> theorizer__evidence_extraction
-  reproduction__replication__adjudicate --> theorizer__evidence_extraction
-  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
-  theorizer__theory_generation --> theorizer__testability_triage
-  reproduction__data_driven_discovery --> theorizer__testability_triage
-  reproduction__evidence_gathering --> theorizer__testability_triage
-  theorizer__testability_triage --> theorizer__novelty_assessment
-  theorizer__theory_generation --> theorizer__theory_synthesis
-  theorizer__novelty_assessment --> theorizer__theory_synthesis
-  theorizer__testability_triage --> theorizer__theory_synthesis
-  theorizer__testability_triage --> verification__analysis
-  reproduction__data_driven_discovery --> verification__analysis
-  reproduction__evidence_gathering --> verification__analysis
-  verification__analysis --> verification__audit
-  theorizer__testability_triage --> verification__adjudicate
-  verification__analysis --> verification__adjudicate
-  verification__audit --> verification__adjudicate
-  verification --> verification_synthesis
-  theorizer__novelty_assessment --> verification_synthesis
-  data_provenance__provenance_synthesis --> gap_synthesis
-  reproduction__reproduction_synthesis --> gap_synthesis
-  theorizer__theory_synthesis --> gap_synthesis
-  verification_synthesis --> gap_synthesis
-  data_provenance__provenance_synthesis --> final_synthesis
-  reproduction__reproduction_synthesis --> final_synthesis
-  theorizer__theory_synthesis --> final_synthesis
-  verification_synthesis --> final_synthesis
-  gap_synthesis --> final_synthesis
diff --git a/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/skills/research-step/assets/compiled/data_driven_discovery.schema.json
deleted file mode 100644
index 14f65a7..0000000
--- a/skills/research-step/assets/compiled/data_driven_discovery.schema.json
+++ /dev/null
@@ -1,152 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/data_driven_discovery.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "datasets",
-    "artifacts"
-  ],
-  "title": "data_driven_discovery",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/data_provenance.mmd b/skills/research-step/assets/compiled/data_provenance.mmd
deleted file mode 100644
index 3b46977..0000000
--- a/skills/research-step/assets/compiled/data_provenance.mmd
+++ /dev/null
@@ -1,16 +0,0 @@
-%% data_provenance — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  provenance_search["provenance_search<br/>asta literature find · asta papers search"]
-  provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-  provenance_synthesis["provenance_synthesis"]
-  provenance_search --> provenance_extraction
-  provenance_search --> data_acquisition
-  provenance_extraction --> data_acquisition
-  provenance_search --> provenance_synthesis
-  provenance_extraction --> provenance_synthesis
-  data_acquisition --> provenance_synthesis
diff --git a/skills/research-step/assets/compiled/discovery_run.schema.json b/skills/research-step/assets/compiled/discovery_run.schema.json
deleted file mode 100644
index b7ac259..0000000
--- a/skills/research-step/assets/compiled/discovery_run.schema.json
+++ /dev/null
@@ -1,170 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "experiment": {
-      "additionalProperties": true,
-      "properties": {
-        "analysis": {
-          "type": "string"
-        },
-        "experiment_id": {
-          "type": "string"
-        },
-        "hypothesis": {
-          "type": "string"
-        },
-        "status": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "experiment_id",
-        "status",
-        "hypothesis",
-        "analysis"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_run.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    },
-    "experiments": {
-      "items": {
-        "$ref": "#/$defs/experiment"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "experiments",
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "discovery_run",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/skills/research-step/assets/compiled/discovery_synthesis.schema.json
deleted file mode 100644
index 29cb31f..0000000
--- a/skills/research-step/assets/compiled/discovery_synthesis.schema.json
+++ /dev/null
@@ -1,271 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "discovery_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "interpretation": {
-          "type": "string"
-        },
-        "laws": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "deciding_experiment": {
-                "type": "string"
-              },
-              "effect_size_discovery": {
-                "type": "string"
-              },
-              "effect_size_holdout": {
-                "type": "string"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "surprise": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "surprise",
-              "outcome",
-              "deciding_experiment",
-              "effect_size_discovery",
-              "effect_size_holdout"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "run_id": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "run_id",
-        "laws",
-        "interpretation",
-        "next_steps",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/discovery_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "discovery_report": {
-      "$ref": "#/$defs/discovery_report"
-    }
-  },
-  "required": [
-    "discovery_report",
-    "artifacts"
-  ],
-  "title": "discovery_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/evidence_extraction.schema.json b/skills/research-step/assets/compiled/evidence_extraction.schema.json
deleted file mode 100644
index 7a53a5b..0000000
--- a/skills/research-step/assets/compiled/evidence_extraction.schema.json
+++ /dev/null
@@ -1,132 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "artifacts"
-  ],
-  "title": "evidence_extraction",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/evidence_gathering.schema.json b/skills/research-step/assets/compiled/evidence_gathering.schema.json
deleted file mode 100644
index c310796..0000000
--- a/skills/research-step/assets/compiled/evidence_gathering.schema.json
+++ /dev/null
@@ -1,121 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "dataset": {
-      "additionalProperties": true,
-      "properties": {
-        "covers_laws": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "definition": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "n": {
-          "type": "number"
-        },
-        "sampling": {
-          "type": "string"
-        },
-        "source": {
-          "type": "string"
-        },
-        "variables": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "id",
-        "definition",
-        "source",
-        "n",
-        "sampling",
-        "variables",
-        "covers_laws"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/evidence_gathering.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "datasets": {
-      "items": {
-        "$ref": "#/$defs/dataset"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "datasets",
-    "artifacts"
-  ],
-  "title": "evidence_gathering",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/experiment_design.schema.json b/skills/research-step/assets/compiled/experiment_design.schema.json
deleted file mode 100644
index 458fe42..0000000
--- a/skills/research-step/assets/compiled/experiment_design.schema.json
+++ /dev/null
@@ -1,162 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "experiment_design": {
-      "additionalProperties": true,
-      "properties": {
-        "construct_equivalence": {
-          "enum": [
-            "equivalent",
-            "proxy",
-            "mismatch"
-          ]
-        },
-        "data_gap": {
-          "type": "string"
-        },
-        "experiment_design_query": {
-          "type": "string"
-        },
-        "experiment_name": {
-          "type": "string"
-        },
-        "feasibility": {
-          "enum": [
-            "feasible",
-            "proxy_only",
-            "data_unavailable",
-            "construct_mismatch"
-          ]
-        },
-        "independent_operationalization": {
-          "type": "string"
-        },
-        "plain_language_description": {
-          "type": "string"
-        },
-        "prespecified": {
-          "additionalProperties": true,
-          "properties": {
-            "metric": {
-              "type": "string"
-            },
-            "success_threshold": {
-              "type": "string"
-            },
-            "test": {
-              "type": "string"
-            }
-          },
-          "required": [
-            "test",
-            "metric",
-            "success_threshold"
-          ],
-          "type": "object"
-        },
-        "required_data": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "experiment_name",
-        "plain_language_description",
-        "source_operationalization",
-        "independent_operationalization",
-        "construct_equivalence",
-        "feasibility",
-        "required_data",
-        "data_gap",
-        "experiment_design_query",
-        "prespecified"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/experiment_design.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "experiment_design": {
-      "$ref": "#/$defs/experiment_design"
-    }
-  },
-  "required": [
-    "experiment_design",
-    "artifacts"
-  ],
-  "title": "experiment_design",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/final_synthesis.schema.json b/skills/research-step/assets/compiled/final_synthesis.schema.json
deleted file mode 100644
index b00f085..0000000
--- a/skills/research-step/assets/compiled/final_synthesis.schema.json
+++ /dev/null
@@ -1,289 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "research_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "inference_chain": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "chain": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "claim": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "claim",
-              "chain"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sub_reports": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "report_path",
-              "one_line"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "tensions_and_surprises": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "evidence": {
-                "type": "string"
-              },
-              "observation": {
-                "type": "string"
-              },
-              "where": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "observation",
-              "where",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_highlights": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "claim": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_was_done": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theory_highlights",
-        "inference_chain",
-        "what_was_done",
-        "sub_reports",
-        "tensions_and_surprises",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/final_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "research_report": {
-      "$ref": "#/$defs/research_report"
-    }
-  },
-  "required": [
-    "research_report",
-    "artifacts"
-  ],
-  "title": "final_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/flows.json b/skills/research-step/assets/compiled/flows.json
deleted file mode 100644
index 907a432..0000000
--- a/skills/research-step/assets/compiled/flows.json
+++ /dev/null
@@ -1,6657 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "flows": {
-    "auto_discovery": {
-      "edges": [
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "discovery_run"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "cohort_assembly",
-          "source": "cohort_assembly",
-          "target": "replication__holdout_replication"
-        },
-        {
-          "external": false,
-          "input": "discovery_run",
-          "source": "discovery_run",
-          "target": "discovery_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "discovery_synthesis"
-        }
-      ],
-      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta documents",
-            "asta generate-theories find-and-extract",
-            "asta autodiscovery create",
-            "asta autodiscovery upload",
-            "asta autodiscovery metadata"
-          ],
-          "id": "cohort_assembly",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
-          "name": "cohort_assembly",
-          "parent": null,
-          "replan": false,
-          "task": "cohort_assembly"
-        },
-        {
-          "chain": [
-            "asta autodiscovery submit",
-            "asta autodiscovery experiments"
-          ],
-          "id": "discovery_run",
-          "inputs": [
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
-          "name": "discovery_run",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_run"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__holdout_replication",
-          "inputs": [
-            "discovery_run",
-            "cohort_assembly"
-          ],
-          "kind": "step",
-          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "holdout_replication",
-          "parent": "replication",
-          "replan": false,
-          "task": "holdout_replication"
-        },
-        {
-          "chain": [],
-          "id": "discovery_synthesis",
-          "inputs": [
-            "discovery_run",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
-          "name": "discovery_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "discovery_synthesis"
-        }
-      ]
-    },
-    "data_and_literature_grounded_theory_generation": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "data_provenance__provenance_search",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "data_provenance__provenance_extraction",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_provenance__data_acquisition",
-          "target": "data_provenance__provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "reproduction__law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "reproduction__replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "reproduction__replication__experiment_design",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "reproduction__replication__analysis",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "reproduction__replication__audit",
-          "target": "reproduction__replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "reproduction__replication",
-          "target": "reproduction__reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "reproduction__law_extraction",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "adjudicate",
-          "source": "reproduction__replication__adjudicate",
-          "target": "theorizer__evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "theorizer__evidence_extraction",
-          "target": "theorizer__theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "theorizer__testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theorizer__theory_generation",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "theorizer__theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "reproduction__data_driven_discovery",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "reproduction__evidence_gathering",
-          "target": "verification__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__audit"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "theorizer__testability_triage",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "verification__analysis",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "verification__audit",
-          "target": "verification__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "verification",
-          "source": "verification",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "theorizer__novelty_assessment",
-          "target": "verification_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "gap_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_synthesis",
-          "source": "data_provenance__provenance_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "reproduction_synthesis",
-          "source": "reproduction__reproduction_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "theory_synthesis",
-          "source": "theorizer__theory_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "verification_synthesis",
-          "source": "verification_synthesis",
-          "target": "final_synthesis"
-        },
-        {
-          "external": false,
-          "input": "gap_synthesis",
-          "source": "gap_synthesis",
-          "target": "final_synthesis"
-        }
-      ],
-      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
-      "nodes": [
-        {
-          "id": "data_provenance",
-          "kind": "embed",
-          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
-          "name": "data_provenance",
-          "parent": null,
-          "replan": false,
-          "workflow": "data_provenance"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "data_provenance__provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "data_provenance__provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_provenance__data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "data_provenance__provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": "data_provenance",
-          "replan": false,
-          "task": "provenance_synthesis"
-        },
-        {
-          "id": "reproduction",
-          "kind": "embed",
-          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
-          "name": "reproduction",
-          "parent": null,
-          "replan": false,
-          "workflow": "reproduction"
-        },
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "reproduction__data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "reproduction__evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "reproduction__replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": "reproduction",
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "reproduction__replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "reproduction__replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "reproduction__replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction__reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": "reproduction",
-          "replan": false,
-          "task": "reproduction_synthesis"
-        },
-        {
-          "id": "theorizer",
-          "kind": "embed",
-          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
-          "name": "theorizer",
-          "parent": null,
-          "replan": false,
-          "workflow": "theorizer"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "theorizer__evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theorizer__theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": "theorizer",
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theorizer__theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theorizer__theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "theorizer__novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theorizer__theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": "theorizer",
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "verification",
-          "kind": "group",
-          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
-          "name": "verification",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__analysis",
-          "inputs": [
-            "testability_triage",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "verification",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "verification__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "verification",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "verification__adjudicate",
-          "inputs": [
-            "testability_triage",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
-          "name": "adjudicate",
-          "parent": "verification",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "verification_synthesis",
-          "inputs": [
-            "verification",
-            "novelty_assessment"
-          ],
-          "kind": "step",
-          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
-          "name": "verification_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "verification_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "gap_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
-          "name": "gap_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "gap_synthesis"
-        },
-        {
-          "chain": [],
-          "id": "final_synthesis",
-          "inputs": [
-            "provenance_synthesis",
-            "reproduction_synthesis",
-            "theory_synthesis",
-            "verification_synthesis",
-            "gap_synthesis"
-          ],
-          "kind": "step",
-          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
-          "name": "final_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "final_synthesis"
-        }
-      ]
-    },
-    "data_provenance": {
-      "edges": [
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_extraction"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "provenance_search",
-          "source": "provenance_search",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "provenance_extraction",
-          "source": "provenance_extraction",
-          "target": "provenance_synthesis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "data_acquisition",
-          "target": "provenance_synthesis"
-        }
-      ],
-      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "provenance_search",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
-          "name": "provenance_search",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_search"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "provenance_extraction",
-          "inputs": [
-            "provenance_search"
-          ],
-          "kind": "step",
-          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
-          "name": "provenance_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_extraction"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "data_acquisition",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction"
-          ],
-          "kind": "step",
-          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
-          "name": "data_acquisition",
-          "parent": null,
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [],
-          "id": "provenance_synthesis",
-          "inputs": [
-            "provenance_search",
-            "provenance_extraction",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
-          "name": "provenance_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "provenance_synthesis"
-        }
-      ]
-    },
-    "hypothesis_driven_research": {
-      "edges": [
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "hypothesis_formation"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "literature_review",
-          "source": "literature_review",
-          "target": "testing__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__data_acquisition"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "data_acquisition",
-          "source": "testing__data_acquisition",
-          "target": "testing__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "testing__experiment_design",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "testing__analysis",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "testing__audit",
-          "target": "testing__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "hypothesis_formation",
-          "source": "hypothesis_formation",
-          "target": "hypothesis_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testing",
-          "source": "testing",
-          "target": "hypothesis_synthesis"
-        }
-      ],
-      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
-      "nodes": [
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search"
-          ],
-          "id": "literature_review",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
-          "name": "literature_review",
-          "parent": null,
-          "replan": false,
-          "task": "literature_review"
-        },
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "hypothesis_formation",
-          "inputs": [
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
-          "name": "hypothesis_formation",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_formation"
-        },
-        {
-          "id": "testing",
-          "kind": "group",
-          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
-          "name": "testing",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "testing__experiment_design",
-          "inputs": [
-            "hypothesis_formation",
-            "literature_review"
-          ],
-          "kind": "step",
-          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "testing",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "testing__data_acquisition",
-          "inputs": [
-            "experiment_design"
-          ],
-          "kind": "step",
-          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
-          "name": "data_acquisition",
-          "parent": "testing",
-          "replan": false,
-          "task": "data_acquisition"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__analysis",
-          "inputs": [
-            "experiment_design",
-            "data_acquisition"
-          ],
-          "kind": "step",
-          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "testing",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "testing__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "testing",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "testing__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
-          "name": "adjudicate",
-          "parent": "testing",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "hypothesis_synthesis",
-          "inputs": [
-            "hypothesis_formation",
-            "testing"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
-          "name": "hypothesis_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "hypothesis_synthesis"
-        }
-      ]
-    },
-    "reproduction": {
-      "edges": [
-        {
-          "external": false,
-          "input": "data_driven_discovery",
-          "source": "data_driven_discovery",
-          "target": "law_extraction"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "evidence_gathering"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__experiment_design"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "evidence_gathering",
-          "source": "evidence_gathering",
-          "target": "replication__analysis"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__audit"
-        },
-        {
-          "external": false,
-          "input": "experiment_design",
-          "source": "replication__experiment_design",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "analysis",
-          "source": "replication__analysis",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "audit",
-          "source": "replication__audit",
-          "target": "replication__adjudicate"
-        },
-        {
-          "external": false,
-          "input": "law_extraction",
-          "source": "law_extraction",
-          "target": "reproduction_synthesis"
-        },
-        {
-          "external": false,
-          "input": "replication",
-          "source": "replication",
-          "target": "reproduction_synthesis"
-        }
-      ],
-      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
-      "nodes": [
-        {
-          "chain": [
-            "asta autodiscovery run",
-            "asta autodiscovery experiments"
-          ],
-          "id": "data_driven_discovery",
-          "inputs": [],
-          "kind": "step",
-          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "chain": [],
-          "id": "law_extraction",
-          "inputs": [
-            "data_driven_discovery"
-          ],
-          "kind": "step",
-          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        },
-        {
-          "chain": [
-            "asta literature find",
-            "asta papers search",
-            "asta documents",
-            "asta autodiscovery upload"
-          ],
-          "id": "evidence_gathering",
-          "inputs": [
-            "law_extraction"
-          ],
-          "kind": "step",
-          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "replication",
-          "kind": "group",
-          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
-          "name": "replication",
-          "parent": null,
-          "replan": true
-        },
-        {
-          "chain": [
-            "asta experiment"
-          ],
-          "id": "replication__experiment_design",
-          "inputs": [
-            "law_extraction",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
-          "name": "experiment_design",
-          "parent": "replication",
-          "replan": false,
-          "task": "experiment_design"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__analysis",
-          "inputs": [
-            "experiment_design",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
-          "name": "analysis",
-          "parent": "replication",
-          "replan": false,
-          "task": "analysis"
-        },
-        {
-          "chain": [
-            "asta analyze-data submit",
-            "asta analyze-data poll"
-          ],
-          "id": "replication__audit",
-          "inputs": [
-            "analysis"
-          ],
-          "kind": "step",
-          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
-          "name": "audit",
-          "parent": "replication",
-          "replan": false,
-          "task": "audit"
-        },
-        {
-          "chain": [],
-          "id": "replication__adjudicate",
-          "inputs": [
-            "experiment_design",
-            "analysis",
-            "audit"
-          ],
-          "kind": "step",
-          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
-          "name": "adjudicate",
-          "parent": "replication",
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "chain": [],
-          "id": "reproduction_synthesis",
-          "inputs": [
-            "law_extraction",
-            "replication"
-          ],
-          "kind": "step",
-          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
-          "name": "reproduction_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "reproduction_synthesis"
-        }
-      ]
-    },
-    "theorizer": {
-      "edges": [
-        {
-          "external": true,
-          "input": "law_extraction",
-          "source": "ext__law_extraction",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": true,
-          "input": "adjudicate",
-          "source": "ext__adjudicate",
-          "target": "evidence_extraction"
-        },
-        {
-          "external": false,
-          "input": "evidence_extraction",
-          "source": "evidence_extraction",
-          "target": "theory_generation__theory_formation"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "data_driven_discovery",
-          "source": "ext__data_driven_discovery",
-          "target": "testability_triage"
-        },
-        {
-          "external": true,
-          "input": "evidence_gathering",
-          "source": "ext__evidence_gathering",
-          "target": "testability_triage"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "novelty_assessment"
-        },
-        {
-          "external": false,
-          "input": "theory_generation",
-          "source": "theory_generation",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "novelty_assessment",
-          "source": "novelty_assessment",
-          "target": "theory_synthesis"
-        },
-        {
-          "external": false,
-          "input": "testability_triage",
-          "source": "testability_triage",
-          "target": "theory_synthesis"
-        }
-      ],
-      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
-      "nodes": [
-        {
-          "chain": [
-            "asta generate-theories build-extraction-schema",
-            "asta generate-theories find-and-extract"
-          ],
-          "id": "evidence_extraction",
-          "inputs": [
-            "law_extraction",
-            "adjudicate"
-          ],
-          "kind": "step",
-          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
-          "name": "evidence_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_extraction"
-        },
-        {
-          "id": "theory_generation",
-          "kind": "group",
-          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
-          "name": "theory_generation",
-          "parent": null,
-          "replan": false
-        },
-        {
-          "chain": [
-            "asta generate-theories form-theory"
-          ],
-          "id": "theory_generation__theory_formation",
-          "inputs": [
-            "evidence_extraction"
-          ],
-          "kind": "step",
-          "mission": "Form theories from the shared extraction store under this branch's objective.",
-          "name": "theory_formation",
-          "parent": "theory_generation",
-          "replan": false,
-          "task": "theory_formation"
-        },
-        {
-          "chain": [],
-          "id": "testability_triage",
-          "inputs": [
-            "theory_generation",
-            "data_driven_discovery",
-            "evidence_gathering"
-          ],
-          "kind": "step",
-          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
-          "name": "testability_triage",
-          "parent": null,
-          "replan": false,
-          "task": "testability_triage"
-        },
-        {
-          "chain": [
-            "asta generate-theories evaluate-novelty"
-          ],
-          "id": "novelty_assessment",
-          "inputs": [
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
-          "name": "novelty_assessment",
-          "parent": null,
-          "replan": false,
-          "task": "novelty_assessment"
-        },
-        {
-          "chain": [],
-          "id": "theory_synthesis",
-          "inputs": [
-            "theory_generation",
-            "novelty_assessment",
-            "testability_triage"
-          ],
-          "kind": "step",
-          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
-          "name": "theory_synthesis",
-          "parent": null,
-          "replan": false,
-          "task": "theory_synthesis"
-        },
-        {
-          "id": "ext__adjudicate",
-          "kind": "external",
-          "mission": "",
-          "name": "adjudicate",
-          "parent": null,
-          "replan": false,
-          "task": "adjudicate"
-        },
-        {
-          "id": "ext__data_driven_discovery",
-          "kind": "external",
-          "mission": "",
-          "name": "data_driven_discovery",
-          "parent": null,
-          "replan": false,
-          "task": "data_driven_discovery"
-        },
-        {
-          "id": "ext__evidence_gathering",
-          "kind": "external",
-          "mission": "",
-          "name": "evidence_gathering",
-          "parent": null,
-          "replan": false,
-          "task": "evidence_gathering"
-        },
-        {
-          "id": "ext__law_extraction",
-          "kind": "external",
-          "mission": "",
-          "name": "law_extraction",
-          "parent": null,
-          "replan": false,
-          "task": "law_extraction"
-        }
-      ]
-    }
-  },
-  "format_version": 1,
-  "schema_version": 2,
-  "tasks": {
-    "adjudicate": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/adjudicate.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "artifacts"
-        ],
-        "title": "adjudicate",
-        "type": "object"
-      }
-    },
-    "analysis": {
-      "output": {
-        "analysis": "analysis",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "analysis": {
-            "additionalProperties": true,
-            "properties": {
-              "assumptions": {
-                "type": "string"
-              },
-              "code": {
-                "type": "string"
-              },
-              "final_answer": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "final_answer",
-              "assumptions",
-              "code"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/analysis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "analysis": {
-            "$ref": "#/$defs/analysis"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "analysis",
-          "figures",
-          "artifacts"
-        ],
-        "title": "analysis",
-        "type": "object"
-      }
-    },
-    "audit": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "audit_report": "audit_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "audit_report": {
-            "additionalProperties": true,
-            "properties": {
-              "artifacts_found": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "challenges": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "check": {
-                      "type": "string"
-                    },
-                    "concern": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "concern",
-                    "check",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "recommended_adjustment": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "verdict_survives": {
-                "type": "boolean"
-              }
-            },
-            "required": [
-              "subject_id",
-              "challenges",
-              "artifacts_found",
-              "verdict_survives",
-              "recommended_adjustment"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/audit.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "audit_report": {
-            "$ref": "#/$defs/audit_report"
-          }
-        },
-        "required": [
-          "audit_report",
-          "artifacts"
-        ],
-        "title": "audit",
-        "type": "object"
-      }
-    },
-    "cohort_assembly": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "cohort": "cohort",
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "cohort": {
-            "additionalProperties": true,
-            "properties": {
-              "discovery_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "exclusion_criteria": {
-                "type": "string"
-              },
-              "holdout_subset": {
-                "additionalProperties": true,
-                "properties": {
-                  "definition": {
-                    "type": "string"
-                  },
-                  "n": {
-                    "type": "number"
-                  },
-                  "path": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "definition",
-                  "n",
-                  "path"
-                ],
-                "type": "object"
-              },
-              "id": {
-                "type": "string"
-              },
-              "inclusion_criteria": {
-                "type": "string"
-              },
-              "research_question": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source_data_sources": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "research_question",
-              "inclusion_criteria",
-              "exclusion_criteria",
-              "sampling",
-              "source_data_sources",
-              "discovery_subset",
-              "holdout_subset",
-              "run_id"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/cohort_assembly.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "cohort": {
-            "$ref": "#/$defs/cohort"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "cohort",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "cohort_assembly",
-        "type": "object"
-      }
-    },
-    "data_acquisition": {
-      "output": {
-        "acquisitions": [
-          "acquisition"
-        ],
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "acquisition": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "validation_note": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "access_status",
-              "local_path",
-              "dataset_id",
-              "validation_note"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_acquisition.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "acquisitions": {
-            "items": {
-              "$ref": "#/$defs/acquisition"
-            },
-            "type": "array"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "acquisitions",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_acquisition",
-        "type": "object"
-      }
-    },
-    "data_driven_discovery": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/data_driven_discovery.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "datasets",
-          "artifacts"
-        ],
-        "title": "data_driven_discovery",
-        "type": "object"
-      }
-    },
-    "discovery_run": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ],
-        "experiments": [
-          "experiment"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "experiment": {
-            "additionalProperties": true,
-            "properties": {
-              "analysis": {
-                "type": "string"
-              },
-              "experiment_id": {
-                "type": "string"
-              },
-              "hypothesis": {
-                "type": "string"
-              },
-              "status": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "experiment_id",
-              "status",
-              "hypothesis",
-              "analysis"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_run.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          },
-          "experiments": {
-            "items": {
-              "$ref": "#/$defs/experiment"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "experiments",
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "discovery_run",
-        "type": "object"
-      }
-    },
-    "discovery_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "discovery_report": "discovery_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "discovery_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "interpretation": {
-                "type": "string"
-              },
-              "laws": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "deciding_experiment": {
-                      "type": "string"
-                    },
-                    "effect_size_discovery": {
-                      "type": "string"
-                    },
-                    "effect_size_holdout": {
-                      "type": "string"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "surprise": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "surprise",
-                    "outcome",
-                    "deciding_experiment",
-                    "effect_size_discovery",
-                    "effect_size_holdout"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "run_id": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "run_id",
-              "laws",
-              "interpretation",
-              "next_steps",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/discovery_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "discovery_report": {
-            "$ref": "#/$defs/discovery_report"
-          }
-        },
-        "required": [
-          "discovery_report",
-          "artifacts"
-        ],
-        "title": "discovery_synthesis",
-        "type": "object"
-      }
-    },
-    "evidence_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "artifacts"
-        ],
-        "title": "evidence_extraction",
-        "type": "object"
-      }
-    },
-    "evidence_gathering": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "datasets": [
-          "dataset"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "dataset": {
-            "additionalProperties": true,
-            "properties": {
-              "covers_laws": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "definition": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "n": {
-                "type": "number"
-              },
-              "sampling": {
-                "type": "string"
-              },
-              "source": {
-                "type": "string"
-              },
-              "variables": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "id",
-              "definition",
-              "source",
-              "n",
-              "sampling",
-              "variables",
-              "covers_laws"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/evidence_gathering.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "datasets": {
-            "items": {
-              "$ref": "#/$defs/dataset"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "datasets",
-          "artifacts"
-        ],
-        "title": "evidence_gathering",
-        "type": "object"
-      }
-    },
-    "experiment_design": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "experiment_design": "experiment_design"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "experiment_design": {
-            "additionalProperties": true,
-            "properties": {
-              "construct_equivalence": {
-                "enum": [
-                  "equivalent",
-                  "proxy",
-                  "mismatch"
-                ]
-              },
-              "data_gap": {
-                "type": "string"
-              },
-              "experiment_design_query": {
-                "type": "string"
-              },
-              "experiment_name": {
-                "type": "string"
-              },
-              "feasibility": {
-                "enum": [
-                  "feasible",
-                  "proxy_only",
-                  "data_unavailable",
-                  "construct_mismatch"
-                ]
-              },
-              "independent_operationalization": {
-                "type": "string"
-              },
-              "plain_language_description": {
-                "type": "string"
-              },
-              "prespecified": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "experiment_name",
-              "plain_language_description",
-              "source_operationalization",
-              "independent_operationalization",
-              "construct_equivalence",
-              "feasibility",
-              "required_data",
-              "data_gap",
-              "experiment_design_query",
-              "prespecified"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/experiment_design.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "experiment_design": {
-            "$ref": "#/$defs/experiment_design"
-          }
-        },
-        "required": [
-          "experiment_design",
-          "artifacts"
-        ],
-        "title": "experiment_design",
-        "type": "object"
-      }
-    },
-    "final_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "research_report": "research_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "research_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "inference_chain": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "chain": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "claim": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "claim",
-                    "chain"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sub_reports": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "kind": {
-                      "type": "string"
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "report_path": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "kind",
-                    "report_path",
-                    "one_line"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "tensions_and_surprises": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "observation": {
-                      "type": "string"
-                    },
-                    "where": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "observation",
-                    "where",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_highlights": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "claim": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_was_done": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theory_highlights",
-              "inference_chain",
-              "what_was_done",
-              "sub_reports",
-              "tensions_and_surprises",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/final_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "research_report": {
-            "$ref": "#/$defs/research_report"
-          }
-        },
-        "required": [
-          "research_report",
-          "artifacts"
-        ],
-        "title": "final_synthesis",
-        "type": "object"
-      }
-    },
-    "gap_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_gaps_report": "data_gaps_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_gaps_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "arose_in": {
-                      "type": "string"
-                    },
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity",
-                    "arose_in"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "next_steps": {
-                "items": {
-                  "$ref": "#/$defs/next_run_proposal"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "gaps",
-              "next_steps",
-              "figures",
-              "links"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "next_run_proposal": {
-            "additionalProperties": true,
-            "properties": {
-              "data_needed": {
-                "type": "string"
-              },
-              "expected_signature": {
-                "type": "string"
-              },
-              "kind": {
-                "type": "string"
-              },
-              "priority": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              },
-              "tests": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "kind",
-              "title",
-              "tests",
-              "data_needed",
-              "expected_signature",
-              "priority"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/gap_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_gaps_report": {
-            "$ref": "#/$defs/data_gaps_report"
-          }
-        },
-        "required": [
-          "data_gaps_report",
-          "artifacts"
-        ],
-        "title": "gap_synthesis",
-        "type": "object"
-      }
-    },
-    "holdout_replication": {
-      "output": {
-        "adjudication": "adjudication",
-        "artifacts": [
-          "artifact"
-        ],
-        "figures": [
-          "figure"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "adjudication": {
-            "additionalProperties": true,
-            "properties": {
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "prespecified_check": {
-                "type": "string"
-              },
-              "subject_id": {
-                "type": "string"
-              },
-              "subject_kind": {
-                "enum": [
-                  "empirical_law",
-                  "theory",
-                  "hypothesis"
-                ]
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "subject_kind",
-              "subject_id",
-              "outcome",
-              "testability",
-              "effect_size_observed",
-              "prespecified_check",
-              "independence_axes",
-              "data_used",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/holdout_replication.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "adjudication": {
-            "$ref": "#/$defs/adjudication"
-          },
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "figures": {
-            "items": {
-              "$ref": "#/$defs/figure"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "adjudication",
-          "figures",
-          "artifacts"
-        ],
-        "title": "holdout_replication",
-        "type": "object"
-      }
-    },
-    "hypothesis_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypotheses": [
-          "hypothesis"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "hypothesis": {
-            "additionalProperties": true,
-            "properties": {
-              "falsifiable_prediction": {
-                "type": "string"
-              },
-              "grounds": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "rationale": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "rationale",
-              "falsifiable_prediction",
-              "grounds"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypotheses": {
-            "items": {
-              "$ref": "#/$defs/hypothesis"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "hypotheses",
-          "artifacts"
-        ],
-        "title": "hypothesis_formation",
-        "type": "object"
-      }
-    },
-    "hypothesis_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "hypothesis_report": "hypothesis_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "hypothesis_report": {
-            "additionalProperties": true,
-            "properties": {
-              "answer": {
-                "type": "string"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "hypothesis_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "hypothesis_id",
-                    "statement",
-                    "outcome",
-                    "effect_size_observed",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_questions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "question": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "question",
-              "ledger",
-              "answer",
-              "open_questions",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "hypothesis_report": {
-            "$ref": "#/$defs/hypothesis_report"
-          }
-        },
-        "required": [
-          "hypothesis_report",
-          "artifacts"
-        ],
-        "title": "hypothesis_synthesis",
-        "type": "object"
-      }
-    },
-    "law_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "empirical_laws": [
-          "empirical_law"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "empirical_law": {
-            "additionalProperties": true,
-            "properties": {
-              "construct": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "grouping_rationale": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "mcts_provenance": {
-                "additionalProperties": true,
-                "properties": {
-                  "is_surprising": {
-                    "type": "boolean"
-                  },
-                  "posterior_belief": {
-                    "type": "object"
-                  },
-                  "prior_belief": {
-                    "type": "object"
-                  },
-                  "surprise": {
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "surprise",
-                  "is_surprising",
-                  "prior_belief",
-                  "posterior_belief"
-                ],
-                "type": "object"
-              },
-              "source_node": {
-                "type": "string"
-              },
-              "source_operationalization": {
-                "type": "string"
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "statement",
-              "construct",
-              "source_operationalization",
-              "source_node",
-              "effect_size_source",
-              "grouping_rationale"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/law_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "empirical_laws": {
-            "items": {
-              "$ref": "#/$defs/empirical_law"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "empirical_laws",
-          "artifacts"
-        ],
-        "title": "law_extraction",
-        "type": "object"
-      }
-    },
-    "literature_review": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "literature_review": "literature_review"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "literature_review": {
-            "additionalProperties": true,
-            "properties": {
-              "citations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "corpus_id": {
-                      "type": "number"
-                    },
-                    "id": {
-                      "type": "string"
-                    },
-                    "relevance": {
-                      "type": "string"
-                    },
-                    "title": {
-                      "type": "string"
-                    },
-                    "url": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "id",
-                    "corpus_id",
-                    "title",
-                    "url",
-                    "relevance"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "key_findings": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "text": {
-                      "type": "string"
-                    },
-                    "uuids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    }
-                  },
-                  "required": [
-                    "text",
-                    "uuids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "open_gaps": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "summary": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "summary",
-              "key_findings",
-              "open_gaps",
-              "citations"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/literature_review.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "literature_review": {
-            "$ref": "#/$defs/literature_review"
-          }
-        },
-        "required": [
-          "literature_review",
-          "artifacts"
-        ],
-        "title": "literature_review",
-        "type": "object"
-      }
-    },
-    "novelty_assessment": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_evaluations": [
-          "theory_evaluation"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_evaluation": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "overall_support": {
-                "enum": [
-                  "supports",
-                  "mixed",
-                  "contradicts",
-                  "inconclusive"
-                ]
-              },
-              "overall_support_raw": {
-                "type": "string"
-              },
-              "statement_evaluations": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "explanation": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "statement_index": {
-                      "type": "number"
-                    }
-                  },
-                  "required": [
-                    "statement_index",
-                    "novelty",
-                    "explanation"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "theory_id",
-              "novelty",
-              "overall_support",
-              "explanation",
-              "statement_evaluations"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/novelty_assessment.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_evaluations": {
-            "items": {
-              "$ref": "#/$defs/theory_evaluation"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theory_evaluations",
-          "artifacts"
-        ],
-        "title": "novelty_assessment",
-        "type": "object"
-      }
-    },
-    "provenance_extraction": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "extracted_data": "extracted_data",
-        "source_access": [
-          "source_access"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "extracted_data": {
-            "additionalProperties": true,
-            "properties": {
-              "extraction_schema_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "rows": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "brief_description": {
-                      "type": "string"
-                    },
-                    "citation_title": {
-                      "type": "string"
-                    },
-                    "name_full": {
-                      "type": "string"
-                    },
-                    "name_short": {
-                      "type": "string"
-                    },
-                    "uuid": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "name_short",
-                    "name_full",
-                    "brief_description",
-                    "citation_title",
-                    "uuid"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "run_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "run_id",
-              "paper_id",
-              "extraction_schema_id",
-              "rows"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "source_access": {
-            "additionalProperties": true,
-            "properties": {
-              "data_availability": {
-                "type": "string"
-              },
-              "data_source_id": {
-                "type": "string"
-              },
-              "identifier": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "data_source_id",
-              "data_availability",
-              "repository",
-              "identifier"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_extraction.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "extracted_data": {
-            "$ref": "#/$defs/extracted_data"
-          },
-          "source_access": {
-            "items": {
-              "$ref": "#/$defs/source_access"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "extracted_data",
-          "source_access",
-          "artifacts"
-        ],
-        "title": "provenance_extraction",
-        "type": "object"
-      }
-    },
-    "provenance_search": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "data_sources": [
-          "data_source"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "data_source": {
-            "additionalProperties": true,
-            "properties": {
-              "dataset_id": {
-                "type": "string"
-              },
-              "id": {
-                "type": "string"
-              },
-              "paper_id": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "dataset_id",
-              "paper_id",
-              "paper_title",
-              "paper_url"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_search.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "data_sources": {
-            "items": {
-              "$ref": "#/$defs/data_source"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "data_sources",
-          "artifacts"
-        ],
-        "title": "provenance_search",
-        "type": "object"
-      }
-    },
-    "provenance_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "provenance_report": "provenance_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "provenance_report": {
-            "additionalProperties": true,
-            "properties": {
-              "acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "not_acquired": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "sources": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "access_status": {
-                      "enum": [
-                        "acquired",
-                        "open_unfetched",
-                        "restricted",
-                        "not_found"
-                      ]
-                    },
-                    "dataset_id": {
-                      "type": "string"
-                    },
-                    "local_path": {
-                      "type": "string"
-                    },
-                    "paper_title": {
-                      "type": "string"
-                    },
-                    "paper_url": {
-                      "type": "string"
-                    },
-                    "repository": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "dataset_id",
-                    "paper_title",
-                    "paper_url",
-                    "repository",
-                    "access_status",
-                    "local_path"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "sources",
-              "method_note",
-              "acquired",
-              "not_acquired",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/provenance_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "provenance_report": {
-            "$ref": "#/$defs/provenance_report"
-          }
-        },
-        "required": [
-          "provenance_report",
-          "artifacts"
-        ],
-        "title": "provenance_synthesis",
-        "type": "object"
-      }
-    },
-    "reproduction_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "reproduction_report": "reproduction_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "reproduction_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "laws_ledger": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "effect_size_observed": {
-                      "type": "string"
-                    },
-                    "effect_size_source": {
-                      "type": "string"
-                    },
-                    "evidence": {
-                      "type": "string"
-                    },
-                    "independence_axes": {
-                      "items": {
-                        "enum": [
-                          "region",
-                          "instrument",
-                          "method",
-                          "construct",
-                          "temporal",
-                          "population"
-                        ]
-                      },
-                      "type": "array"
-                    },
-                    "law_id": {
-                      "type": "string"
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "statement": {
-                      "type": "string"
-                    },
-                    "testability": {
-                      "enum": [
-                        "tested",
-                        "proxy_only",
-                        "untestable"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "law_id",
-                    "statement",
-                    "outcome",
-                    "testability",
-                    "effect_size_source",
-                    "effect_size_observed",
-                    "independence_axes",
-                    "evidence"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "method_note": {
-                "type": "string"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_failed_or_untestable": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_held": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "method_note",
-              "laws_ledger",
-              "what_held",
-              "what_failed_or_untestable",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/reproduction_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "reproduction_report": {
-            "$ref": "#/$defs/reproduction_report"
-          }
-        },
-        "required": [
-          "reproduction_report",
-          "artifacts"
-        ],
-        "title": "reproduction_synthesis",
-        "type": "object"
-      }
-    },
-    "testability_triage": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "testability_triage": "testability_triage"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "testability_triage": {
-            "additionalProperties": true,
-            "properties": {
-              "assessments": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "available_data": {
-                      "type": "string"
-                    },
-                    "gap": {
-                      "type": "string"
-                    },
-                    "proposed_test": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "metric": {
-                          "type": "string"
-                        },
-                        "success_threshold": {
-                          "type": "string"
-                        },
-                        "test": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "test",
-                        "metric",
-                        "success_threshold"
-                      ],
-                      "type": "object"
-                    },
-                    "required_data": {
-                      "type": "string"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "testable_now",
-                    "available_data",
-                    "required_data",
-                    "proposed_test",
-                    "gap"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "testable_theory_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "assessments",
-              "testable_theory_ids"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/testability_triage.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "testability_triage": {
-            "$ref": "#/$defs/testability_triage"
-          }
-        },
-        "required": [
-          "testability_triage",
-          "artifacts"
-        ],
-        "title": "testability_triage",
-        "type": "object"
-      }
-    },
-    "theory_formation": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theories": [
-          "theory"
-        ]
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory": {
-            "additionalProperties": true,
-            "properties": {
-              "components": {
-                "additionalProperties": true,
-                "properties": {
-                  "generation_objective": {
-                    "type": "string"
-                  },
-                  "new_predictions_likely": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "new_predictions_unknown": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statements": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "conflicting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "statement_name": {
-                          "type": "string"
-                        },
-                        "supporting_evidence": {
-                          "items": {
-                            "additionalProperties": true,
-                            "properties": {
-                              "text": {
-                                "type": "string"
-                              },
-                              "uuids": {
-                                "items": {
-                                  "type": "string"
-                                },
-                                "type": "array"
-                              }
-                            },
-                            "required": [
-                              "text",
-                              "uuids"
-                            ],
-                            "type": "object"
-                          },
-                          "type": "array"
-                        },
-                        "theory_statement": {
-                          "type": "string"
-                        }
-                      },
-                      "required": [
-                        "statement_name",
-                        "theory_statement",
-                        "supporting_evidence",
-                        "conflicting_evidence"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "unaccounted_for": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "generation_objective",
-                  "theory_statements",
-                  "new_predictions_likely",
-                  "new_predictions_unknown",
-                  "unaccounted_for"
-                ],
-                "type": "object"
-              },
-              "description": {
-                "type": "string"
-              },
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "id": {
-                "type": "string"
-              },
-              "name": {
-                "type": "string"
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "theory_query": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "name",
-              "description",
-              "theory_query",
-              "objective",
-              "grounds_law_ids",
-              "supporting_evidence_ids",
-              "components"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_formation.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theories": {
-            "items": {
-              "$ref": "#/$defs/theory"
-            },
-            "type": "array"
-          }
-        },
-        "required": [
-          "theories",
-          "artifacts"
-        ],
-        "title": "theory_formation",
-        "type": "object"
-      }
-    },
-    "theory_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "theory_report": "theory_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "theory_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "mechanism": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "grounded_in": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  },
-                  "statement": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "statement",
-                  "grounded_in",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "new_predictions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "novelty_summary": {
-                "type": "string"
-              },
-              "open_threads": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "theories": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "grounds_law_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "name": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "objective": {
-                      "enum": [
-                        "accuracy_focused",
-                        "novelty_focused"
-                      ]
-                    },
-                    "one_line": {
-                      "type": "string"
-                    },
-                    "supporting_evidence_ids": {
-                      "items": {
-                        "type": "string"
-                      },
-                      "type": "array"
-                    },
-                    "testable_now": {
-                      "type": "boolean"
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "name",
-                    "objective",
-                    "one_line",
-                    "grounds_law_ids",
-                    "novelty",
-                    "testable_now",
-                    "supporting_evidence_ids"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "title": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "mechanism",
-              "theories",
-              "novelty_summary",
-              "new_predictions",
-              "open_threads",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/theory_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "theory_report": {
-            "$ref": "#/$defs/theory_report"
-          }
-        },
-        "required": [
-          "theory_report",
-          "artifacts"
-        ],
-        "title": "theory_synthesis",
-        "type": "object"
-      }
-    },
-    "verification_synthesis": {
-      "output": {
-        "artifacts": [
-          "artifact"
-        ],
-        "verification_report": "verification_report"
-      },
-      "schema": {
-        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-        "$defs": {
-          "artifact": {
-            "additionalProperties": true,
-            "properties": {
-              "artifactId": {
-                "type": "string"
-              },
-              "description": {
-                "type": "string"
-              },
-              "extensions": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "metadata": {
-                "type": "object"
-              },
-              "name": {
-                "type": "string"
-              },
-              "parts": {
-                "items": {
-                  "$ref": "#/$defs/part"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "artifactId",
-              "name",
-              "description",
-              "parts"
-            ],
-            "type": "object"
-          },
-          "figure": {
-            "additionalProperties": true,
-            "properties": {
-              "caption": {
-                "type": "string"
-              },
-              "image": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "caption",
-              "image"
-            ],
-            "type": "object"
-          },
-          "part": {
-            "additionalProperties": true,
-            "properties": {
-              "kind": {
-                "type": "string"
-              },
-              "metadata": {
-                "type": "object"
-              }
-            },
-            "required": [
-              "kind"
-            ],
-            "type": "object"
-          },
-          "verification_report": {
-            "additionalProperties": true,
-            "properties": {
-              "figures": {
-                "items": {
-                  "$ref": "#/$defs/figure"
-                },
-                "type": "array"
-              },
-              "gaps": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "blocks": {
-                      "type": "string"
-                    },
-                    "item": {
-                      "type": "string"
-                    },
-                    "missing_data": {
-                      "type": "string"
-                    },
-                    "severity": {
-                      "enum": [
-                        "high",
-                        "medium",
-                        "low"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "item",
-                    "missing_data",
-                    "blocks",
-                    "severity"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "headline": {
-                "type": "string"
-              },
-              "links": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "label": {
-                      "type": "string"
-                    },
-                    "ref": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "label",
-                    "ref"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "novelty_by_verification": {
-                "items": {
-                  "additionalProperties": true,
-                  "properties": {
-                    "audit_survived": {
-                      "type": "boolean"
-                    },
-                    "claim": {
-                      "type": "string"
-                    },
-                    "data_used": {
-                      "type": "string"
-                    },
-                    "effect_size": {
-                      "type": "string"
-                    },
-                    "novelty": {
-                      "enum": [
-                        "established",
-                        "derivable",
-                        "genuinely_new"
-                      ]
-                    },
-                    "outcome": {
-                      "enum": [
-                        "held",
-                        "partial",
-                        "failed",
-                        "underpowered",
-                        "n/a"
-                      ]
-                    },
-                    "theory_id": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "theory_id",
-                    "claim",
-                    "novelty",
-                    "outcome",
-                    "effect_size",
-                    "data_used",
-                    "audit_survived"
-                  ],
-                  "type": "object"
-                },
-                "type": "array"
-              },
-              "report_path": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "what_could_not_be_tested": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "what_was_tested": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "report_path",
-              "title",
-              "headline",
-              "novelty_by_verification",
-              "what_was_tested",
-              "what_could_not_be_tested",
-              "figures",
-              "gaps",
-              "links"
-            ],
-            "type": "object"
-          }
-        },
-        "$id": "asta-research-step/verification_synthesis.schema.json",
-        "$schema": "https://json-schema.org/draft/2020-12/schema",
-        "additionalProperties": false,
-        "properties": {
-          "artifacts": {
-            "items": {
-              "$ref": "#/$defs/artifact"
-            },
-            "type": "array"
-          },
-          "verification_report": {
-            "$ref": "#/$defs/verification_report"
-          }
-        },
-        "required": [
-          "verification_report",
-          "artifacts"
-        ],
-        "title": "verification_synthesis",
-        "type": "object"
-      }
-    }
-  }
-}
diff --git a/skills/research-step/assets/compiled/gap_synthesis.schema.json b/skills/research-step/assets/compiled/gap_synthesis.schema.json
deleted file mode 100644
index 760fbb5..0000000
--- a/skills/research-step/assets/compiled/gap_synthesis.schema.json
+++ /dev/null
@@ -1,221 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_gaps_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "arose_in": {
-                "type": "string"
-              },
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity",
-              "arose_in"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "next_steps": {
-          "items": {
-            "$ref": "#/$defs/next_run_proposal"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "gaps",
-        "next_steps",
-        "figures",
-        "links"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "next_run_proposal": {
-      "additionalProperties": true,
-      "properties": {
-        "data_needed": {
-          "type": "string"
-        },
-        "expected_signature": {
-          "type": "string"
-        },
-        "kind": {
-          "type": "string"
-        },
-        "priority": {
-          "enum": [
-            "high",
-            "medium",
-            "low"
-          ]
-        },
-        "tests": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "kind",
-        "title",
-        "tests",
-        "data_needed",
-        "expected_signature",
-        "priority"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/gap_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_gaps_report": {
-      "$ref": "#/$defs/data_gaps_report"
-    }
-  },
-  "required": [
-    "data_gaps_report",
-    "artifacts"
-  ],
-  "title": "gap_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/holdout_replication.schema.json b/skills/research-step/assets/compiled/holdout_replication.schema.json
deleted file mode 100644
index 9d18252..0000000
--- a/skills/research-step/assets/compiled/holdout_replication.schema.json
+++ /dev/null
@@ -1,167 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "adjudication": {
-      "additionalProperties": true,
-      "properties": {
-        "data_used": {
-          "type": "string"
-        },
-        "effect_size_observed": {
-          "type": "string"
-        },
-        "evidence": {
-          "type": "string"
-        },
-        "independence_axes": {
-          "items": {
-            "enum": [
-              "region",
-              "instrument",
-              "method",
-              "construct",
-              "temporal",
-              "population"
-            ]
-          },
-          "type": "array"
-        },
-        "outcome": {
-          "enum": [
-            "held",
-            "partial",
-            "failed",
-            "underpowered",
-            "n/a"
-          ]
-        },
-        "prespecified_check": {
-          "type": "string"
-        },
-        "subject_id": {
-          "type": "string"
-        },
-        "subject_kind": {
-          "enum": [
-            "empirical_law",
-            "theory",
-            "hypothesis"
-          ]
-        },
-        "testability": {
-          "enum": [
-            "tested",
-            "proxy_only",
-            "untestable"
-          ]
-        }
-      },
-      "required": [
-        "subject_kind",
-        "subject_id",
-        "outcome",
-        "testability",
-        "effect_size_observed",
-        "prespecified_check",
-        "independence_axes",
-        "data_used",
-        "evidence"
-      ],
-      "type": "object"
-    },
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/holdout_replication.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "adjudication": {
-      "$ref": "#/$defs/adjudication"
-    },
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "figures": {
-      "items": {
-        "$ref": "#/$defs/figure"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "adjudication",
-    "figures",
-    "artifacts"
-  ],
-  "title": "holdout_replication",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/hypothesis_driven_research.mmd b/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
deleted file mode 100644
index e996ef7..0000000
--- a/skills/research-step/assets/compiled/hypothesis_driven_research.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% hypothesis_driven_research — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  literature_review["literature_review<br/>asta literature find · asta papers search"]
-  hypothesis_formation["hypothesis_formation<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph testing["testing (at replan)"]
-    testing__experiment_design["experiment_design<br/>asta experiment"]
-    testing__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
-    testing__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    testing__adjudicate["adjudicate"]
-  end
-  class testing replan
-  hypothesis_synthesis["hypothesis_synthesis"]
-  literature_review --> hypothesis_formation
-  hypothesis_formation --> testing__experiment_design
-  literature_review --> testing__experiment_design
-  testing__experiment_design --> testing__data_acquisition
-  testing__experiment_design --> testing__analysis
-  testing__data_acquisition --> testing__analysis
-  testing__analysis --> testing__audit
-  testing__experiment_design --> testing__adjudicate
-  testing__analysis --> testing__adjudicate
-  testing__audit --> testing__adjudicate
-  hypothesis_formation --> hypothesis_synthesis
-  testing --> hypothesis_synthesis
diff --git a/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/skills/research-step/assets/compiled/hypothesis_formation.schema.json
deleted file mode 100644
index 694d94f..0000000
--- a/skills/research-step/assets/compiled/hypothesis_formation.schema.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "hypothesis": {
-      "additionalProperties": true,
-      "properties": {
-        "falsifiable_prediction": {
-          "type": "string"
-        },
-        "grounds": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "rationale": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "rationale",
-        "falsifiable_prediction",
-        "grounds"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypotheses": {
-      "items": {
-        "$ref": "#/$defs/hypothesis"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "hypotheses",
-    "artifacts"
-  ],
-  "title": "hypothesis_formation",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
deleted file mode 100644
index b2fe767..0000000
--- a/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
+++ /dev/null
@@ -1,224 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "hypothesis_report": {
-      "additionalProperties": true,
-      "properties": {
-        "answer": {
-          "type": "string"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "hypothesis_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "hypothesis_id",
-              "statement",
-              "outcome",
-              "effect_size_observed",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_questions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "question": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "question",
-        "ledger",
-        "answer",
-        "open_questions",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "hypothesis_report": {
-      "$ref": "#/$defs/hypothesis_report"
-    }
-  },
-  "required": [
-    "hypothesis_report",
-    "artifacts"
-  ],
-  "title": "hypothesis_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/law_extraction.schema.json b/skills/research-step/assets/compiled/law_extraction.schema.json
deleted file mode 100644
index 7b3e1fc..0000000
--- a/skills/research-step/assets/compiled/law_extraction.schema.json
+++ /dev/null
@@ -1,139 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "empirical_law": {
-      "additionalProperties": true,
-      "properties": {
-        "construct": {
-          "type": "string"
-        },
-        "effect_size_source": {
-          "type": "string"
-        },
-        "grouping_rationale": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "mcts_provenance": {
-          "additionalProperties": true,
-          "properties": {
-            "is_surprising": {
-              "type": "boolean"
-            },
-            "posterior_belief": {
-              "type": "object"
-            },
-            "prior_belief": {
-              "type": "object"
-            },
-            "surprise": {
-              "type": "number"
-            }
-          },
-          "required": [
-            "surprise",
-            "is_surprising",
-            "prior_belief",
-            "posterior_belief"
-          ],
-          "type": "object"
-        },
-        "source_node": {
-          "type": "string"
-        },
-        "source_operationalization": {
-          "type": "string"
-        },
-        "statement": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "statement",
-        "construct",
-        "source_operationalization",
-        "source_node",
-        "effect_size_source",
-        "grouping_rationale"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/law_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "empirical_laws": {
-      "items": {
-        "$ref": "#/$defs/empirical_law"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "empirical_laws",
-    "artifacts"
-  ],
-  "title": "law_extraction",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/literature_review.schema.json b/skills/research-step/assets/compiled/literature_review.schema.json
deleted file mode 100644
index 14df7b7..0000000
--- a/skills/research-step/assets/compiled/literature_review.schema.json
+++ /dev/null
@@ -1,150 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "literature_review": {
-      "additionalProperties": true,
-      "properties": {
-        "citations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "corpus_id": {
-                "type": "number"
-              },
-              "id": {
-                "type": "string"
-              },
-              "relevance": {
-                "type": "string"
-              },
-              "title": {
-                "type": "string"
-              },
-              "url": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "id",
-              "corpus_id",
-              "title",
-              "url",
-              "relevance"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "key_findings": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "text": {
-                "type": "string"
-              },
-              "uuids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              }
-            },
-            "required": [
-              "text",
-              "uuids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "open_gaps": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "summary": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "summary",
-        "key_findings",
-        "open_gaps",
-        "citations"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/literature_review.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "literature_review": {
-      "$ref": "#/$defs/literature_review"
-    }
-  },
-  "required": [
-    "literature_review",
-    "artifacts"
-  ],
-  "title": "literature_review",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/novelty_assessment.schema.json b/skills/research-step/assets/compiled/novelty_assessment.schema.json
deleted file mode 100644
index 729f9fe..0000000
--- a/skills/research-step/assets/compiled/novelty_assessment.schema.json
+++ /dev/null
@@ -1,147 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_evaluation": {
-      "additionalProperties": true,
-      "properties": {
-        "explanation": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "novelty": {
-          "enum": [
-            "established",
-            "derivable",
-            "genuinely_new"
-          ]
-        },
-        "overall_support": {
-          "enum": [
-            "supports",
-            "mixed",
-            "contradicts",
-            "inconclusive"
-          ]
-        },
-        "overall_support_raw": {
-          "type": "string"
-        },
-        "statement_evaluations": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "explanation": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "statement_index": {
-                "type": "number"
-              }
-            },
-            "required": [
-              "statement_index",
-              "novelty",
-              "explanation"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "theory_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "theory_id",
-        "novelty",
-        "overall_support",
-        "explanation",
-        "statement_evaluations"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/novelty_assessment.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_evaluations": {
-      "items": {
-        "$ref": "#/$defs/theory_evaluation"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theory_evaluations",
-    "artifacts"
-  ],
-  "title": "novelty_assessment",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/provenance_extraction.schema.json b/skills/research-step/assets/compiled/provenance_extraction.schema.json
deleted file mode 100644
index 2bd4ea8..0000000
--- a/skills/research-step/assets/compiled/provenance_extraction.schema.json
+++ /dev/null
@@ -1,163 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "extracted_data": {
-      "additionalProperties": true,
-      "properties": {
-        "extraction_schema_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "rows": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "brief_description": {
-                "type": "string"
-              },
-              "citation_title": {
-                "type": "string"
-              },
-              "name_full": {
-                "type": "string"
-              },
-              "name_short": {
-                "type": "string"
-              },
-              "uuid": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "name_short",
-              "name_full",
-              "brief_description",
-              "citation_title",
-              "uuid"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "run_id": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "run_id",
-        "paper_id",
-        "extraction_schema_id",
-        "rows"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "source_access": {
-      "additionalProperties": true,
-      "properties": {
-        "data_availability": {
-          "type": "string"
-        },
-        "data_source_id": {
-          "type": "string"
-        },
-        "identifier": {
-          "type": "string"
-        },
-        "repository": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "data_source_id",
-        "data_availability",
-        "repository",
-        "identifier"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_extraction.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "extracted_data": {
-      "$ref": "#/$defs/extracted_data"
-    },
-    "source_access": {
-      "items": {
-        "$ref": "#/$defs/source_access"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "extracted_data",
-    "source_access",
-    "artifacts"
-  ],
-  "title": "provenance_extraction",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/provenance_search.schema.json b/skills/research-step/assets/compiled/provenance_search.schema.json
deleted file mode 100644
index 8a924d9..0000000
--- a/skills/research-step/assets/compiled/provenance_search.schema.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "data_source": {
-      "additionalProperties": true,
-      "properties": {
-        "dataset_id": {
-          "type": "string"
-        },
-        "id": {
-          "type": "string"
-        },
-        "paper_id": {
-          "type": "string"
-        },
-        "paper_title": {
-          "type": "string"
-        },
-        "paper_url": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "dataset_id",
-        "paper_id",
-        "paper_title",
-        "paper_url"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_search.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "data_sources": {
-      "items": {
-        "$ref": "#/$defs/data_source"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "data_sources",
-    "artifacts"
-  ],
-  "title": "provenance_search",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/skills/research-step/assets/compiled/provenance_synthesis.schema.json
deleted file mode 100644
index 0d43a6f..0000000
--- a/skills/research-step/assets/compiled/provenance_synthesis.schema.json
+++ /dev/null
@@ -1,230 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "provenance_report": {
-      "additionalProperties": true,
-      "properties": {
-        "acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "not_acquired": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "sources": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "access_status": {
-                "enum": [
-                  "acquired",
-                  "open_unfetched",
-                  "restricted",
-                  "not_found"
-                ]
-              },
-              "dataset_id": {
-                "type": "string"
-              },
-              "local_path": {
-                "type": "string"
-              },
-              "paper_title": {
-                "type": "string"
-              },
-              "paper_url": {
-                "type": "string"
-              },
-              "repository": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "dataset_id",
-              "paper_title",
-              "paper_url",
-              "repository",
-              "access_status",
-              "local_path"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "sources",
-        "method_note",
-        "acquired",
-        "not_acquired",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/provenance_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "provenance_report": {
-      "$ref": "#/$defs/provenance_report"
-    }
-  },
-  "required": [
-    "provenance_report",
-    "artifacts"
-  ],
-  "title": "provenance_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/reproduction.mmd b/skills/research-step/assets/compiled/reproduction.mmd
deleted file mode 100644
index 4bb9e6e..0000000
--- a/skills/research-step/assets/compiled/reproduction.mmd
+++ /dev/null
@@ -1,29 +0,0 @@
-%% reproduction — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
-  law_extraction["law_extraction"]
-  evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
-  subgraph replication["replication (at replan)"]
-    replication__experiment_design["experiment_design<br/>asta experiment"]
-    replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
-    replication__adjudicate["adjudicate"]
-  end
-  class replication replan
-  reproduction_synthesis["reproduction_synthesis"]
-  data_driven_discovery --> law_extraction
-  law_extraction --> evidence_gathering
-  law_extraction --> replication__experiment_design
-  evidence_gathering --> replication__experiment_design
-  replication__experiment_design --> replication__analysis
-  evidence_gathering --> replication__analysis
-  replication__analysis --> replication__audit
-  replication__experiment_design --> replication__adjudicate
-  replication__analysis --> replication__adjudicate
-  replication__audit --> replication__adjudicate
-  law_extraction --> reproduction_synthesis
-  replication --> reproduction_synthesis
diff --git a/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
deleted file mode 100644
index 570e076..0000000
--- a/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
+++ /dev/null
@@ -1,253 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "reproduction_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "laws_ledger": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "effect_size_observed": {
-                "type": "string"
-              },
-              "effect_size_source": {
-                "type": "string"
-              },
-              "evidence": {
-                "type": "string"
-              },
-              "independence_axes": {
-                "items": {
-                  "enum": [
-                    "region",
-                    "instrument",
-                    "method",
-                    "construct",
-                    "temporal",
-                    "population"
-                  ]
-                },
-                "type": "array"
-              },
-              "law_id": {
-                "type": "string"
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "statement": {
-                "type": "string"
-              },
-              "testability": {
-                "enum": [
-                  "tested",
-                  "proxy_only",
-                  "untestable"
-                ]
-              }
-            },
-            "required": [
-              "law_id",
-              "statement",
-              "outcome",
-              "testability",
-              "effect_size_source",
-              "effect_size_observed",
-              "independence_axes",
-              "evidence"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "method_note": {
-          "type": "string"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_failed_or_untestable": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_held": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "method_note",
-        "laws_ledger",
-        "what_held",
-        "what_failed_or_untestable",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/reproduction_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "reproduction_report": {
-      "$ref": "#/$defs/reproduction_report"
-    }
-  },
-  "required": [
-    "reproduction_report",
-    "artifacts"
-  ],
-  "title": "reproduction_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/testability_triage.schema.json b/skills/research-step/assets/compiled/testability_triage.schema.json
deleted file mode 100644
index 8968920..0000000
--- a/skills/research-step/assets/compiled/testability_triage.schema.json
+++ /dev/null
@@ -1,144 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "testability_triage": {
-      "additionalProperties": true,
-      "properties": {
-        "assessments": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "available_data": {
-                "type": "string"
-              },
-              "gap": {
-                "type": "string"
-              },
-              "proposed_test": {
-                "additionalProperties": true,
-                "properties": {
-                  "metric": {
-                    "type": "string"
-                  },
-                  "success_threshold": {
-                    "type": "string"
-                  },
-                  "test": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "test",
-                  "metric",
-                  "success_threshold"
-                ],
-                "type": "object"
-              },
-              "required_data": {
-                "type": "string"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "testable_now",
-              "available_data",
-              "required_data",
-              "proposed_test",
-              "gap"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "testable_theory_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "assessments",
-        "testable_theory_ids"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/testability_triage.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "testability_triage": {
-      "$ref": "#/$defs/testability_triage"
-    }
-  },
-  "required": [
-    "testability_triage",
-    "artifacts"
-  ],
-  "title": "testability_triage",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/theorizer.mmd b/skills/research-step/assets/compiled/theorizer.mmd
deleted file mode 100644
index 59e2d0f..0000000
--- a/skills/research-step/assets/compiled/theorizer.mmd
+++ /dev/null
@@ -1,27 +0,0 @@
-%% theorizer — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
-%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
-flowchart TD
-  classDef replan stroke-dasharray: 6 4
-  classDef embed fill:#eef6ff,stroke:#6699cc
-  classDef external stroke-dasharray: 3 3,color:#888888
-  evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
-  subgraph theory_generation["theory_generation"]
-    theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
-  end
-  testability_triage["testability_triage"]
-  novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
-  theory_synthesis["theory_synthesis"]
-  ext__adjudicate(["adjudicate (external)"]):::external
-  ext__data_driven_discovery(["data_driven_discovery (external)"]):::external
-  ext__evidence_gathering(["evidence_gathering (external)"]):::external
-  ext__law_extraction(["law_extraction (external)"]):::external
-  ext__law_extraction -.-> evidence_extraction
-  ext__adjudicate -.-> evidence_extraction
-  evidence_extraction --> theory_generation__theory_formation
-  theory_generation --> testability_triage
-  ext__data_driven_discovery -.-> testability_triage
-  ext__evidence_gathering -.-> testability_triage
-  testability_triage --> novelty_assessment
-  theory_generation --> theory_synthesis
-  novelty_assessment --> theory_synthesis
-  testability_triage --> theory_synthesis
diff --git a/skills/research-step/assets/compiled/theory_formation.schema.json b/skills/research-step/assets/compiled/theory_formation.schema.json
deleted file mode 100644
index 7373cec..0000000
--- a/skills/research-step/assets/compiled/theory_formation.schema.json
+++ /dev/null
@@ -1,240 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory": {
-      "additionalProperties": true,
-      "properties": {
-        "components": {
-          "additionalProperties": true,
-          "properties": {
-            "generation_objective": {
-              "type": "string"
-            },
-            "new_predictions_likely": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "new_predictions_unknown": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "theory_statements": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "conflicting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "statement_name": {
-                    "type": "string"
-                  },
-                  "supporting_evidence": {
-                    "items": {
-                      "additionalProperties": true,
-                      "properties": {
-                        "text": {
-                          "type": "string"
-                        },
-                        "uuids": {
-                          "items": {
-                            "type": "string"
-                          },
-                          "type": "array"
-                        }
-                      },
-                      "required": [
-                        "text",
-                        "uuids"
-                      ],
-                      "type": "object"
-                    },
-                    "type": "array"
-                  },
-                  "theory_statement": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "statement_name",
-                  "theory_statement",
-                  "supporting_evidence",
-                  "conflicting_evidence"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            },
-            "unaccounted_for": {
-              "items": {
-                "additionalProperties": true,
-                "properties": {
-                  "text": {
-                    "type": "string"
-                  },
-                  "uuids": {
-                    "items": {
-                      "type": "string"
-                    },
-                    "type": "array"
-                  }
-                },
-                "required": [
-                  "text",
-                  "uuids"
-                ],
-                "type": "object"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "generation_objective",
-            "theory_statements",
-            "new_predictions_likely",
-            "new_predictions_unknown",
-            "unaccounted_for"
-          ],
-          "type": "object"
-        },
-        "description": {
-          "type": "string"
-        },
-        "grounds_law_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        },
-        "objective": {
-          "enum": [
-            "accuracy_focused",
-            "novelty_focused"
-          ]
-        },
-        "supporting_evidence_ids": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "theory_query": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "id",
-        "name",
-        "description",
-        "theory_query",
-        "objective",
-        "grounds_law_ids",
-        "supporting_evidence_ids",
-        "components"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_formation.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theories": {
-      "items": {
-        "$ref": "#/$defs/theory"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "theories",
-    "artifacts"
-  ],
-  "title": "theory_formation",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/theory_synthesis.schema.json b/skills/research-step/assets/compiled/theory_synthesis.schema.json
deleted file mode 100644
index dd2768e..0000000
--- a/skills/research-step/assets/compiled/theory_synthesis.schema.json
+++ /dev/null
@@ -1,280 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "theory_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "mechanism": {
-          "additionalProperties": true,
-          "properties": {
-            "conflicting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "grounded_in": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            },
-            "statement": {
-              "type": "string"
-            },
-            "supporting_evidence": {
-              "items": {
-                "type": "string"
-              },
-              "type": "array"
-            }
-          },
-          "required": [
-            "statement",
-            "grounded_in",
-            "supporting_evidence",
-            "conflicting_evidence"
-          ],
-          "type": "object"
-        },
-        "new_predictions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "novelty_summary": {
-          "type": "string"
-        },
-        "open_threads": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "theories": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "grounds_law_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "name": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "objective": {
-                "enum": [
-                  "accuracy_focused",
-                  "novelty_focused"
-                ]
-              },
-              "one_line": {
-                "type": "string"
-              },
-              "supporting_evidence_ids": {
-                "items": {
-                  "type": "string"
-                },
-                "type": "array"
-              },
-              "testable_now": {
-                "type": "boolean"
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "name",
-              "objective",
-              "one_line",
-              "grounds_law_ids",
-              "novelty",
-              "testable_now",
-              "supporting_evidence_ids"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "title": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "mechanism",
-        "theories",
-        "novelty_summary",
-        "new_predictions",
-        "open_threads",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/theory_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "theory_report": {
-      "$ref": "#/$defs/theory_report"
-    }
-  },
-  "required": [
-    "theory_report",
-    "artifacts"
-  ],
-  "title": "theory_synthesis",
-  "type": "object"
-}
diff --git a/skills/research-step/assets/compiled/verification_synthesis.schema.json b/skills/research-step/assets/compiled/verification_synthesis.schema.json
deleted file mode 100644
index 8d1a639..0000000
--- a/skills/research-step/assets/compiled/verification_synthesis.schema.json
+++ /dev/null
@@ -1,232 +0,0 @@
-{
-  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
-  "$defs": {
-    "artifact": {
-      "additionalProperties": true,
-      "properties": {
-        "artifactId": {
-          "type": "string"
-        },
-        "description": {
-          "type": "string"
-        },
-        "extensions": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "metadata": {
-          "type": "object"
-        },
-        "name": {
-          "type": "string"
-        },
-        "parts": {
-          "items": {
-            "$ref": "#/$defs/part"
-          },
-          "type": "array"
-        }
-      },
-      "required": [
-        "artifactId",
-        "name",
-        "description",
-        "parts"
-      ],
-      "type": "object"
-    },
-    "figure": {
-      "additionalProperties": true,
-      "properties": {
-        "caption": {
-          "type": "string"
-        },
-        "image": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "caption",
-        "image"
-      ],
-      "type": "object"
-    },
-    "part": {
-      "additionalProperties": true,
-      "properties": {
-        "kind": {
-          "type": "string"
-        },
-        "metadata": {
-          "type": "object"
-        }
-      },
-      "required": [
-        "kind"
-      ],
-      "type": "object"
-    },
-    "verification_report": {
-      "additionalProperties": true,
-      "properties": {
-        "figures": {
-          "items": {
-            "$ref": "#/$defs/figure"
-          },
-          "type": "array"
-        },
-        "gaps": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "blocks": {
-                "type": "string"
-              },
-              "item": {
-                "type": "string"
-              },
-              "missing_data": {
-                "type": "string"
-              },
-              "severity": {
-                "enum": [
-                  "high",
-                  "medium",
-                  "low"
-                ]
-              }
-            },
-            "required": [
-              "item",
-              "missing_data",
-              "blocks",
-              "severity"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "headline": {
-          "type": "string"
-        },
-        "links": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "label": {
-                "type": "string"
-              },
-              "ref": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "label",
-              "ref"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "novelty_by_verification": {
-          "items": {
-            "additionalProperties": true,
-            "properties": {
-              "audit_survived": {
-                "type": "boolean"
-              },
-              "claim": {
-                "type": "string"
-              },
-              "data_used": {
-                "type": "string"
-              },
-              "effect_size": {
-                "type": "string"
-              },
-              "novelty": {
-                "enum": [
-                  "established",
-                  "derivable",
-                  "genuinely_new"
-                ]
-              },
-              "outcome": {
-                "enum": [
-                  "held",
-                  "partial",
-                  "failed",
-                  "underpowered",
-                  "n/a"
-                ]
-              },
-              "theory_id": {
-                "type": "string"
-              }
-            },
-            "required": [
-              "theory_id",
-              "claim",
-              "novelty",
-              "outcome",
-              "effect_size",
-              "data_used",
-              "audit_survived"
-            ],
-            "type": "object"
-          },
-          "type": "array"
-        },
-        "report_path": {
-          "type": "string"
-        },
-        "title": {
-          "type": "string"
-        },
-        "what_could_not_be_tested": {
-          "items": {
-            "type": "string"
-          },
-          "type": "array"
-        },
-        "what_was_tested": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "report_path",
-        "title",
-        "headline",
-        "novelty_by_verification",
-        "what_was_tested",
-        "what_could_not_be_tested",
-        "figures",
-        "gaps",
-        "links"
-      ],
-      "type": "object"
-    }
-  },
-  "$id": "asta-research-step/verification_synthesis.schema.json",
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "additionalProperties": false,
-  "properties": {
-    "artifacts": {
-      "items": {
-        "$ref": "#/$defs/artifact"
-      },
-      "type": "array"
-    },
-    "verification_report": {
-      "$ref": "#/$defs/verification_report"
-    }
-  },
-  "required": [
-    "verification_report",
-    "artifacts"
-  ],
-  "title": "verification_synthesis",
-  "type": "object"
-}

From e596be577b37864e50db527324764bf76e62730d Mon Sep 17 00:00:00 2001
From: Charlie McGrady <charliem@allenai.org>
Date: Tue, 16 Jun 2026 09:42:51 -0700
Subject: [PATCH 6/6] research-step: add compiled output schemas, flows.json,
 high-level flow diagram, and compiler for review

---
 scripts/compile-schemas.py                    |  807 ++
 .../assets/compiled/adjudicate.schema.json    |  144 +
 .../assets/compiled/analysis.schema.json      |  119 +
 .../assets/compiled/audit.schema.json         |  127 +
 .../compiled/cohort_assembly.schema.json      |  206 +
 .../compiled/data_acquisition.schema.json     |  161 +
 ..._literature_grounded_theory_generation.mmd |   92 +
 .../data_driven_discovery.schema.json         |  152 +
 .../assets/compiled/discovery_run.schema.json |  170 +
 .../compiled/discovery_synthesis.schema.json  |  271 +
 .../compiled/evidence_extraction.schema.json  |  132 +
 .../compiled/evidence_gathering.schema.json   |  121 +
 .../compiled/experiment_design.schema.json    |  162 +
 .../compiled/final_synthesis.schema.json      |  289 +
 .../research-step/assets/compiled/flows.json  | 6657 +++++++++++++++++
 .../assets/compiled/gap_synthesis.schema.json |  221 +
 .../compiled/holdout_replication.schema.json  |  167 +
 .../compiled/hypothesis_formation.schema.json |  126 +
 .../compiled/hypothesis_synthesis.schema.json |  224 +
 .../compiled/law_extraction.schema.json       |  139 +
 .../compiled/literature_review.schema.json    |  150 +
 .../compiled/novelty_assessment.schema.json   |  147 +
 .../provenance_extraction.schema.json         |  163 +
 .../compiled/provenance_search.schema.json    |  107 +
 .../compiled/provenance_synthesis.schema.json |  230 +
 .../reproduction_synthesis.schema.json        |  253 +
 .../compiled/testability_triage.schema.json   |  144 +
 .../compiled/theory_formation.schema.json     |  240 +
 .../compiled/theory_synthesis.schema.json     |  280 +
 .../verification_synthesis.schema.json        |  232 +
 30 files changed, 12433 insertions(+)
 create mode 100644 scripts/compile-schemas.py
 create mode 100644 skills/research-step/assets/compiled/adjudicate.schema.json
 create mode 100644 skills/research-step/assets/compiled/analysis.schema.json
 create mode 100644 skills/research-step/assets/compiled/audit.schema.json
 create mode 100644 skills/research-step/assets/compiled/cohort_assembly.schema.json
 create mode 100644 skills/research-step/assets/compiled/data_acquisition.schema.json
 create mode 100644 skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
 create mode 100644 skills/research-step/assets/compiled/data_driven_discovery.schema.json
 create mode 100644 skills/research-step/assets/compiled/discovery_run.schema.json
 create mode 100644 skills/research-step/assets/compiled/discovery_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/evidence_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/evidence_gathering.schema.json
 create mode 100644 skills/research-step/assets/compiled/experiment_design.schema.json
 create mode 100644 skills/research-step/assets/compiled/final_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/flows.json
 create mode 100644 skills/research-step/assets/compiled/gap_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/holdout_replication.schema.json
 create mode 100644 skills/research-step/assets/compiled/hypothesis_formation.schema.json
 create mode 100644 skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/law_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/literature_review.schema.json
 create mode 100644 skills/research-step/assets/compiled/novelty_assessment.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_extraction.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_search.schema.json
 create mode 100644 skills/research-step/assets/compiled/provenance_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/reproduction_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/testability_triage.schema.json
 create mode 100644 skills/research-step/assets/compiled/theory_formation.schema.json
 create mode 100644 skills/research-step/assets/compiled/theory_synthesis.schema.json
 create mode 100644 skills/research-step/assets/compiled/verification_synthesis.schema.json

diff --git a/scripts/compile-schemas.py b/scripts/compile-schemas.py
new file mode 100644
index 0000000..d237028
--- /dev/null
+++ b/scripts/compile-schemas.py
@@ -0,0 +1,807 @@
+#!/usr/bin/env python3
+"""compile-schemas.py — compiler for research-step's assets/schemas.yaml.
+
+Three stages:
+  parse  — load schemas.yaml and parse the type mini-DSL and the flow trees.
+  check  — resolve every cross-reference: type fields -> types/enums/builtins,
+           task outputs -> types, flow steps -> tasks, {workflow: X} chain
+           items -> flows, and every `input:` entry -> an upstream node in the
+           flow's expanded graph (or in some flow that embeds it). Also diffs
+           scripts/task-output-keys.sh against the compiler's own answer.
+  emit   — assets/compiled/<task>.schema.json (one self-contained JSON Schema
+           per task), assets/compiled/<flow>.mmd (one fully-expanded mermaid
+           diagram per flow), and assets/compiled/flows.json (every flow as a
+           machine-readable graph — nodes, edges, chain commands, missions —
+           with the task schemas embedded, for downstream renderers).
+
+Contract notes encoded here (they mirror the comments in schemas.yaml):
+  - top-level output_json is closed (additionalProperties: false) — byproducts
+    go in artifacts;
+  - nested objects are open (additionalProperties: true) — agent payloads nest
+    verbatim, extra nested fields are always permitted;
+  - a `?` suffix on a field name marks it optional (e.g. `mcts_provenance?:`);
+    unmarked fields are required.
+
+Exit: 0 ok (warnings allowed) · 1 errors (all printed).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+SCHEMA_VERSION = 2
+# Shape of assets/compiled/flows.json. Bump when the JSON structure changes;
+# downstream renderers gate on it before drawing anything.
+FLOWS_FORMAT_VERSION = 1
+BUILTINS = {
+    "string": {"type": "string"},
+    "number": {"type": "number"},
+    "boolean": {"type": "boolean"},
+    "object": {"type": "object"},
+}
+RESERVED_NODE_KEYS = {"mission", "input", "chain", "replan"}
+JSON_DIALECT = "https://json-schema.org/draft/2020-12/schema"
+GENERATED_NOTE = (
+    "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit"
+)
+
+
+def field_name(raw):
+    """Split a DSL field name into (name, optional)."""
+    if raw.endswith("?"):
+        return raw[:-1], True
+    return raw, False
+
+
+class Node:
+    """One node of a flow tree (step, group, or sub-flow embedding)."""
+
+    def __init__(self, name, kind, parent, origin):
+        self.name = name
+        self.kind = kind  # step | group | embed
+        self.parent = parent
+        self.origin = origin  # (flow_name, path-within-that-flow tuple)
+        self.mission = ""
+        self.inputs = []
+        self.chain_cmds = []
+        self.workflow = None  # embed only: the flow it expands
+        self.replan = False
+        self.children = []
+        self.preorder = -1
+
+    def ancestors(self):
+        n = self.parent
+        while n is not None:
+            yield n
+            n = n.parent
+
+    def subtree(self):
+        yield self
+        for c in self.children:
+            yield from c.subtree()
+
+    def path(self):
+        parts = []
+        n = self
+        while n.parent is not None:
+            parts.append(n.name)
+            n = n.parent
+        return tuple(reversed(parts))
+
+
+class Compiler:
+    def __init__(self, doc):
+        self.doc = doc
+        self.errors = []
+        self.warnings = []
+        self.enums = {}
+        self.types = {}
+        self.tasks = {}
+        self.flows_raw = {}
+
+    def err(self, ctx, msg):
+        self.errors.append(f"{ctx}: {msg}")
+
+    def warn(self, ctx, msg):
+        self.warnings.append(f"{ctx}: {msg}")
+
+    # ---------- parse + check: sections ----------
+
+    def check_document(self):
+        d = self.doc
+        if not isinstance(d, dict):
+            self.err("document", "schemas.yaml is not a mapping")
+            return
+        for key in ("version", "config", "enums", "types", "tasks", "flows"):
+            if key not in d:
+                self.err("document", f"missing top-level section '{key}'")
+        if d.get("version") != SCHEMA_VERSION:
+            self.err("version", f"expected {SCHEMA_VERSION}, got {d.get('version')!r}")
+        for k in d:
+            if k not in ("version", "config", "enums", "types", "tasks", "flows"):
+                self.err("document", f"unknown top-level section '{k}'")
+
+        config = d.get("config") or {}
+        if not isinstance(config, dict):
+            self.err("config", "must be a mapping of key: scalar-default")
+        else:
+            for k, v in config.items():
+                if not isinstance(v, (int, float, str, bool)):
+                    self.err(
+                        f"config.{k}",
+                        f"default must be a scalar, got {type(v).__name__}",
+                    )
+
+        enums = d.get("enums") or {}
+        if not isinstance(enums, dict):
+            self.err("enums", "must be a mapping of name: [values]")
+            enums = {}
+        for name, values in enums.items():
+            ctx = f"enums.{name}"
+            if not isinstance(values, list) or not values:
+                self.err(ctx, "must be a non-empty list")
+                continue
+            if any(not isinstance(v, str) for v in values):
+                self.err(ctx, "values must all be strings")
+                continue
+            if len(set(values)) != len(values):
+                self.err(ctx, "values must be unique")
+            self.enums[name] = values
+
+        types = d.get("types") or {}
+        if not isinstance(types, dict):
+            self.err("types", "must be a mapping of name: definition")
+            types = {}
+        self.types = dict(types)
+
+        # name collisions across the three namespaces a reference can hit
+        for name in self.types:
+            if name in BUILTINS:
+                self.err(f"types.{name}", "collides with a builtin scalar name")
+            if name in self.enums:
+                self.err(f"types.{name}", "collides with an enum name")
+        for name in self.enums:
+            if name in BUILTINS:
+                self.err(f"enums.{name}", "collides with a builtin scalar name")
+
+        for name, definition in self.types.items():
+            self.check_expr(definition, f"types.{name}")
+
+        tasks = d.get("tasks") or {}
+        if not isinstance(tasks, dict):
+            self.err("tasks", "must be a mapping of task_type: {output: ...}")
+            tasks = {}
+        for name, spec in tasks.items():
+            ctx = f"tasks.{name}"
+            if not isinstance(spec, dict) or set(spec) != {"output"}:
+                self.err(ctx, "must be exactly {output: {...}}")
+                continue
+            output = spec["output"]
+            if not isinstance(output, dict) or not output:
+                self.err(ctx, "output must be a non-empty mapping")
+                continue
+            for key, expr in output.items():
+                if key.endswith("?"):
+                    self.err(
+                        f"{ctx}.output", f"top-level key '{key}' may not be optional"
+                    )
+                self.check_expr(expr, f"{ctx}.output.{key}")
+            if output.get("artifacts") != ["artifact"]:
+                self.err(ctx, "output must include artifacts: [artifact]")
+            self.tasks[name] = output
+
+        flows = d.get("flows") or {}
+        if not isinstance(flows, dict):
+            self.err("flows", "must be a mapping of flow_name: tree")
+            flows = {}
+        self.flows_raw = flows
+
+    # ---------- the type expression DSL ----------
+
+    def check_expr(self, expr, ctx):
+        if isinstance(expr, str):
+            name, opt = field_name(expr)
+            if opt:
+                self.err(
+                    ctx,
+                    "the '?' optional marker belongs on field names, not type names",
+                )
+            if (
+                name not in BUILTINS
+                and name not in self.enums
+                and name not in self.types
+            ):
+                self.err(ctx, f"unknown type or enum '{name}'")
+        elif isinstance(expr, list):
+            if len(expr) != 1:
+                self.err(
+                    ctx,
+                    f"array type must have exactly one element type, got {len(expr)}",
+                )
+            else:
+                self.check_expr(expr[0], f"{ctx}[]")
+        elif isinstance(expr, dict):
+            if not expr:
+                self.err(ctx, "inline object must declare at least one field")
+            for raw, sub in expr.items():
+                name, _ = field_name(raw)
+                self.check_expr(sub, f"{ctx}.{name}")
+        else:
+            self.err(ctx, f"unsupported type expression: {expr!r}")
+
+    def type_refs(self, expr):
+        """Yield the named types an expression references (no emission)."""
+        if isinstance(expr, str):
+            if expr in self.types:
+                yield expr
+        elif isinstance(expr, list):
+            for item in expr:
+                yield from self.type_refs(item)
+        elif isinstance(expr, dict):
+            for sub in expr.values():
+                yield from self.type_refs(sub)
+
+    def compile_expr(self, expr):
+        if isinstance(expr, str):
+            if expr in BUILTINS:
+                return dict(BUILTINS[expr])
+            if expr in self.enums:
+                return {"enum": list(self.enums[expr])}
+            if expr in self.types:
+                return {"$ref": f"#/$defs/{expr}"}
+            return {}  # already reported by check_expr
+        if isinstance(expr, list) and len(expr) == 1:
+            return {"type": "array", "items": self.compile_expr(expr[0])}
+        if isinstance(expr, dict):
+            return self.compile_object(expr)
+        return {}
+
+    def compile_object(self, fields):
+        props, required = {}, []
+        for raw, sub in fields.items():
+            name, optional = field_name(raw)
+            props[name] = self.compile_expr(sub)
+            if not optional:
+                required.append(name)
+        # nested objects stay open: agent payloads nest verbatim, extra
+        # nested fields are always permitted (see the note in schemas.yaml)
+        return {
+            "type": "object",
+            "properties": props,
+            "required": required,
+            "additionalProperties": True,
+        }
+
+    def type_closure(self, seed):
+        seen = set()
+        frontier = set(seed)
+        while frontier:
+            name = frontier.pop()
+            if name in seen or name not in self.types:
+                continue
+            seen.add(name)
+            frontier |= set(self.type_refs(self.types[name])) - seen
+        return seen
+
+    def compile_task_schema(self, task):
+        output = self.tasks[task]
+        props, required = {}, []
+        for key, expr in output.items():
+            name, _ = field_name(key)
+            props[name] = self.compile_expr(expr)
+            required.append(name)
+        deps = {t for expr in output.values() for t in self.type_refs(expr)}
+        defs = {
+            name: self.compile_expr(self.types[name])
+            for name in sorted(self.type_closure(deps))
+        }
+        schema = {
+            "$schema": JSON_DIALECT,
+            "$id": f"asta-research-step/{task}.schema.json",
+            "$comment": GENERATED_NOTE,
+            "title": task,
+            "type": "object",
+            "properties": props,
+            "required": required,
+            # top level is closed: byproducts go in artifacts
+            "additionalProperties": False,
+        }
+        if defs:
+            schema["$defs"] = defs
+        return schema
+
+    # ---------- flows: parse ----------
+
+    def parse_flow(self, flow_name):
+        raw = self.flows_raw[flow_name]
+        ctx = f"flows.{flow_name}"
+        root = Node(flow_name, "group", None, (flow_name, ()))
+        if not isinstance(raw, dict):
+            self.err(ctx, "flow must be a mapping")
+            return root
+        root.mission = raw.get("mission", "")
+        if not isinstance(root.mission, str) or not root.mission:
+            self.err(ctx, "flow must carry a mission")
+        for key, value in raw.items():
+            if key == "mission":
+                continue
+            if key in RESERVED_NODE_KEYS:
+                self.err(ctx, f"flow root may not carry '{key}'")
+                continue
+            child = self.parse_node(key, value, root, flow_name, f"{ctx}.{key}")
+            if child is not None:
+                root.children.append(child)
+        if not root.children:
+            self.err(ctx, "flow has no steps")
+        return root
+
+    def parse_node(self, name, raw, parent, flow_name, ctx):
+        if not isinstance(raw, dict):
+            self.err(ctx, "node must be a mapping")
+            return None
+        origin = (flow_name, parent.origin[1] + (name,))
+        mission = raw.get("mission", "")
+        if not isinstance(mission, str) or not mission:
+            self.err(ctx, "node must carry a mission")
+
+        child_keys = [k for k in raw if k not in RESERVED_NODE_KEYS]
+        bad_children = [k for k in child_keys if not isinstance(raw[k], dict)]
+        for k in bad_children:
+            self.err(f"{ctx}.{k}", "unknown node attribute (children must be mappings)")
+        child_keys = [k for k in child_keys if k not in bad_children]
+
+        inputs = raw.get("input", None)
+        if inputs is not None and (
+            not isinstance(inputs, list) or any(not isinstance(i, str) for i in inputs)
+        ):
+            self.err(ctx, "input must be a list of node names")
+            inputs = []
+
+        replan = raw.get("replan", False)
+        if not isinstance(replan, bool):
+            self.err(ctx, "replan must be a boolean")
+            replan = False
+        if replan and "chain" in raw:
+            self.err(ctx, "replan is only valid on groups (branches created at replan)")
+
+        if "chain" not in raw:  # group
+            node = Node(name, "group", parent, origin)
+            node.mission = mission
+            node.replan = replan
+            if inputs is not None:
+                self.err(ctx, "groups may not carry input — inputs live on steps")
+            if not child_keys:
+                self.err(ctx, "group has no children (a step needs a chain, even [])")
+            for k in child_keys:
+                child = self.parse_node(k, raw[k], node, flow_name, f"{ctx}.{k}")
+                if child is not None:
+                    node.children.append(child)
+            return node
+
+        chain = raw["chain"]
+        if not isinstance(chain, list):
+            self.err(ctx, "chain must be a list")
+            chain = []
+        workflow_items = [c for c in chain if isinstance(c, dict)]
+
+        if workflow_items:  # sub-flow embedding
+            node = Node(name, "embed", parent, origin)
+            node.mission = mission
+            if len(chain) != 1:
+                self.err(
+                    ctx, "a {workflow: ...} chain item must be the chain's only item"
+                )
+            item = workflow_items[0]
+            if set(item) != {"workflow", "mission"}:
+                self.err(ctx, "workflow chain item must be exactly {workflow, mission}")
+            wf = item.get("workflow")
+            if wf not in self.flows_raw:
+                self.err(ctx, f"unknown workflow '{wf}'")
+            else:
+                node.workflow = wf
+            if inputs is not None:
+                self.err(ctx, "embed nodes may not carry input")
+            if child_keys:
+                self.err(ctx, "embed nodes may not have children")
+            return node
+
+        # step
+        node = Node(name, "step", parent, origin)
+        node.mission = mission
+        node.inputs = list(inputs or [])
+        if name not in self.tasks:
+            self.err(
+                ctx,
+                f"step name is not a declared task (known: {', '.join(sorted(self.tasks))})",
+            )
+        for c in chain:
+            if not isinstance(c, str):
+                self.err(ctx, f"chain item must be a string or {{workflow}}: {c!r}")
+            elif not c.startswith("asta "):
+                self.err(
+                    ctx, f"chain command does not look like an asta command: '{c}'"
+                )
+            else:
+                node.chain_cmds.append(c)
+        if child_keys:
+            self.err(ctx, f"steps may not have children: {', '.join(child_keys)}")
+        return node
+
+    # ---------- flows: expansion + input resolution ----------
+
+    def expand_flow(self, flow_name, parsed, stack=()):
+        """Build a fresh expanded tree for flow_name; embeds become groups
+        wrapping the embedded flow's (recursively expanded) children."""
+        if flow_name in stack:
+            self.err(
+                f"flows.{flow_name}",
+                f"workflow embedding cycle: {' -> '.join(stack + (flow_name,))}",
+            )
+            return None
+
+        def clone(node, parent):
+            copy = Node(node.name, node.kind, parent, node.origin)
+            copy.mission, copy.inputs = node.mission, list(node.inputs)
+            copy.chain_cmds, copy.replan = list(node.chain_cmds), node.replan
+            copy.workflow = node.workflow
+            if node.kind == "embed" and node.workflow:
+                inner = self.expand_flow(node.workflow, parsed, stack + (flow_name,))
+                if inner is not None:
+                    for c in inner.children:
+                        c.parent = copy
+                        copy.children.append(c)
+            else:
+                for c in node.children:
+                    copy.children.append(clone(c, copy))
+            return copy
+
+        root = clone(parsed[flow_name], None)
+        for i, n in enumerate(root.subtree()):
+            n.preorder = i
+        return root
+
+    @staticmethod
+    def resolve_input(step, name):
+        """Nearest-ancestor-scope, upstream-only resolution: walk outward from
+        the step; in each scope pick the closest preceding node with that name."""
+        ancestors = set(step.ancestors())
+        for scope in step.ancestors():
+            candidates = [
+                n
+                for n in scope.subtree()
+                if n.name == name and n.preorder < step.preorder and n not in ancestors
+            ]
+            if candidates:
+                return max(candidates, key=lambda n: n.preorder)
+        return None
+
+    def check_flows(self):
+        parsed = {name: self.parse_flow(name) for name in self.flows_raw}
+        if self.errors:
+            # expansion and resolution on a broken parse just compounds noise
+            return {}
+        expanded = {name: self.expand_flow(name, parsed) for name in self.flows_raw}
+        if any(v is None for v in expanded.values()) or self.errors:
+            return {}
+
+        # (origin_flow, origin_path, input) -> {context_flow: resolved?}
+        results = {}
+        for context, root in expanded.items():
+            for node in root.subtree():
+                for inp in node.inputs:
+                    key = (node.origin[0], node.origin[1], inp)
+                    results.setdefault(key, {})[context] = (
+                        self.resolve_input(node, inp) is not None
+                    )
+
+        for (flow, path, inp), by_context in sorted(results.items()):
+            if not any(by_context.values()):
+                ctx = f"flows.{flow}.{'.'.join(path)}"
+                contexts = ", ".join(sorted(by_context))
+                self.err(
+                    ctx,
+                    f"input '{inp}' does not resolve to any upstream node "
+                    f"in any context (checked: {contexts})",
+                )
+        return expanded
+
+    # ---------- script compatibility ----------
+
+    def check_scripts(self, skill_dir):
+        script = Path(skill_dir) / "scripts" / "task-output-keys.sh"
+        if not script.is_file():
+            self.err("scripts", f"{script} not found")
+            return
+
+        def query(task):
+            return subprocess.run(
+                ["bash", str(script), task], capture_output=True, text=True
+            )
+
+        # each call is bash -> python3 -> full YAML parse; run them concurrently
+        with ThreadPoolExecutor(max_workers=8) as pool:
+            futures = {task: pool.submit(query, task) for task in sorted(self.tasks)}
+            bogus = pool.submit(query, "definitely_not_a_task")
+
+        for task, future in futures.items():
+            r = future.result()
+            if r.returncode != 0:
+                self.err(
+                    f"scripts.task-output-keys.{task}",
+                    f"exited {r.returncode}: {r.stderr.strip()}",
+                )
+                continue
+            got = r.stdout.split()
+            want = [field_name(k)[0] for k in self.tasks[task]]
+            if got != want:
+                self.err(
+                    f"scripts.task-output-keys.{task}",
+                    f"key drift: script says [{' '.join(got)}], schema says [{' '.join(want)}]",
+                )
+        if bogus.result().returncode != 3:
+            self.err(
+                "scripts.task-output-keys",
+                f"unknown task_type should exit 3, got {bogus.result().returncode}",
+            )
+
+    # ---------- mermaid emission ----------
+
+    @staticmethod
+    def node_id(node):
+        return re.sub(r"[^A-Za-z0-9_]", "_", "__".join(node.path())) or "root"
+
+    @staticmethod
+    def label(text):
+        return text.replace('"', "#quot;")
+
+    def emit_mermaid(self, flow_name, root):
+        lines = [
+            f"%% {flow_name} — {GENERATED_NOTE}",
+            "%% solid arrows: input wiring · dashed: inputs satisfied outside this flow",
+            "flowchart TD",
+            "  classDef replan stroke-dasharray: 6 4",
+            "  classDef embed fill:#eef6ff,stroke:#6699cc",
+            "  classDef external stroke-dasharray: 3 3,color:#888888",
+        ]
+        edges, externals = [], {}
+
+        def walk(node, depth):
+            pad = "  " * depth
+            nid = self.node_id(node)
+            if node.kind == "step":
+                text = node.name
+                if node.chain_cmds:
+                    text += "<br/>" + " · ".join(node.chain_cmds)
+                lines.append(f'{pad}{nid}["{self.label(text)}"]')
+                return
+            title = node.name
+            if node.kind == "embed":
+                title += f" [flow: {node.workflow}]"
+            if node.replan:
+                title += " (at replan)"
+            lines.append(f'{pad}subgraph {nid}["{self.label(title)}"]')
+            for c in node.children:
+                walk(c, depth + 1)
+            lines.append(f"{pad}end")
+            if node.kind == "embed":
+                lines.append(f"{pad}class {nid} embed")
+            if node.replan:
+                lines.append(f"{pad}class {nid} replan")
+
+        for child in root.children:
+            walk(child, 1)
+
+        for node in root.subtree():
+            for inp in node.inputs:
+                src = self.resolve_input(node, inp)
+                if src is not None:
+                    edges.append(f"  {self.node_id(src)} --> {self.node_id(node)}")
+                else:
+                    ext = externals.setdefault(
+                        inp, f"ext__{re.sub(r'[^A-Za-z0-9_]', '_', inp)}"
+                    )
+                    edges.append(f"  {ext} -.-> {self.node_id(node)}")
+
+        for inp, ext in sorted(externals.items()):
+            lines.append(f'  {ext}(["{self.label(inp)} (external)"]):::external')
+        lines.extend(edges)
+        return "\n".join(lines) + "\n"
+
+    # ---------- flows.json emission ----------
+
+    def emit_flow_graph(self, root):
+        """One flow as a graph: the same expanded tree and input resolution the
+        mermaid emitter walks, as data. Nodes come in preorder so a node's
+        `parent` always appears earlier (the React Flow sub-flow contract)."""
+        nodes = []
+        for node in root.subtree():
+            if node is root:
+                continue
+            entry = {
+                "id": self.node_id(node),
+                "parent": None if node.parent is root else self.node_id(node.parent),
+                "kind": node.kind,
+                "name": node.name,
+                "mission": node.mission,
+                "replan": node.replan,
+            }
+            if node.kind == "step":
+                entry["task"] = node.name
+                entry["chain"] = list(node.chain_cmds)
+                entry["inputs"] = list(node.inputs)
+            elif node.kind == "embed":
+                entry["workflow"] = node.workflow
+            nodes.append(entry)
+
+        edges, externals = [], {}
+        for node in root.subtree():
+            for inp in node.inputs:
+                src = self.resolve_input(node, inp)
+                if src is not None:
+                    source = self.node_id(src)
+                    external = False
+                else:
+                    source = externals.setdefault(
+                        inp, f"ext__{re.sub(r'[^A-Za-z0-9_]', '_', inp)}"
+                    )
+                    external = True
+                edges.append({
+                    "source": source,
+                    "target": self.node_id(node),
+                    "input": inp,
+                    "external": external,
+                })
+        # inputs satisfied outside this flow: synthetic nodes, like the
+        # mermaid emitter's dashed "(external)" stadiums
+        for inp, ext in sorted(externals.items()):
+            entry = {
+                "id": ext,
+                "parent": None,
+                "kind": "external",
+                "name": inp,
+                "mission": "",
+                "replan": False,
+            }
+            if inp in self.tasks:
+                entry["task"] = inp
+            nodes.append(entry)
+        return {"mission": root.mission, "nodes": nodes, "edges": edges}
+
+    def emit_flows_doc(self, expanded, schemas):
+        """The whole flows.json document. Task schemas are embedded once,
+        keyed by task name (nodes reference them via their `task` field), so
+        a renderer needs no further fetches; `output` is the raw mini-DSL
+        expression for compact key/type display."""
+        return {
+            "format_version": FLOWS_FORMAT_VERSION,
+            "schema_version": SCHEMA_VERSION,
+            "$comment": GENERATED_NOTE,
+            "tasks": {
+                task: {"output": self.tasks[task], "schema": schemas[task]}
+                for task in sorted(self.tasks)
+            },
+            "flows": {
+                name: self.emit_flow_graph(root) for name, root in expanded.items()
+            },
+        }
+
+    # ---------- driver ----------
+
+    def run(self, skill_dir=None, script_check=True):
+        self.check_document()
+        expanded = self.check_flows()
+
+        # unreachable-type warning: everything should hang off some task
+        if self.tasks:
+            seeds = {
+                t
+                for output in self.tasks.values()
+                for expr in output.values()
+                for t in self.type_refs(expr)
+            }
+            reachable = self.type_closure(seeds)
+            for name in self.types:
+                if name not in reachable:
+                    self.warn(f"types.{name}", "not reachable from any task output")
+
+        if script_check and skill_dir is not None and not self.errors:
+            self.check_scripts(skill_dir)
+
+        schemas = {}
+        diagrams = {}
+        flows_doc = {}
+        if not self.errors:
+            schemas = {task: self.compile_task_schema(task) for task in self.tasks}
+            diagrams = {
+                name: self.emit_mermaid(name, root) for name, root in expanded.items()
+            }
+            flows_doc = self.emit_flows_doc(expanded, schemas)
+        return schemas, diagrams, flows_doc
+
+
+def main(argv=None):
+    repo = Path(__file__).resolve().parent.parent
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    ap.add_argument(
+        "--skill-dir",
+        type=Path,
+        default=repo / "skills" / "research-step",
+        help="the research-step skill directory (default: skills/research-step)",
+    )
+    ap.add_argument(
+        "--no-script-check",
+        action="store_true",
+        help="skip the task-output-keys.sh compatibility check",
+    )
+    ap.add_argument(
+        "--check",
+        action="store_true",
+        help="validate only; do not write assets/compiled/",
+    )
+    args = ap.parse_args(argv)
+
+    try:
+        import yaml
+    except ImportError:
+        print(
+            "compile-schemas: python3 cannot import yaml (PyYAML) — pip install pyyaml",
+            file=sys.stderr,
+        )
+        return 1
+
+    schemas_path = args.skill_dir / "assets" / "schemas.yaml"
+    try:
+        with open(schemas_path) as f:
+            doc = yaml.safe_load(f)
+    except Exception as e:
+        print(f"compile-schemas: cannot read {schemas_path}: {e}", file=sys.stderr)
+        return 1
+
+    compiler = Compiler(doc)
+    schemas, diagrams, flows_doc = compiler.run(
+        skill_dir=args.skill_dir, script_check=not args.no_script_check
+    )
+
+    for w in compiler.warnings:
+        print(f"compile-schemas: warning: {w}", file=sys.stderr)
+    if compiler.errors:
+        for e in compiler.errors:
+            print(f"compile-schemas: error: {e}", file=sys.stderr)
+        print(f"compile-schemas: {len(compiler.errors)} error(s)", file=sys.stderr)
+        return 1
+
+    if not args.check:
+        out_dir = args.skill_dir / "assets" / "compiled"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        for stale in (
+            list(out_dir.glob("*.schema.json"))
+            + list(out_dir.glob("*.mmd"))
+            + list(out_dir.glob("flows.json"))
+        ):
+            stale.unlink()
+        for task, schema in schemas.items():
+            (out_dir / f"{task}.schema.json").write_text(
+                json.dumps(schema, indent=2, sort_keys=True) + "\n"
+            )
+        for flow, mmd in diagrams.items():
+            (out_dir / f"{flow}.mmd").write_text(mmd)
+        (out_dir / "flows.json").write_text(
+            json.dumps(flows_doc, indent=2, sort_keys=True) + "\n"
+        )
+
+    print(
+        f"compile-schemas: ok — {len(compiler.types)} types, "
+        f"{len(schemas)} task schemas, {len(diagrams)} flow diagrams, flows.json"
+        + ("" if args.check else f" -> {args.skill_dir / 'assets' / 'compiled'}")
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/skills/research-step/assets/compiled/adjudicate.schema.json b/skills/research-step/assets/compiled/adjudicate.schema.json
new file mode 100644
index 0000000..ccfb9d1
--- /dev/null
+++ b/skills/research-step/assets/compiled/adjudicate.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/adjudicate.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "artifacts"
+  ],
+  "title": "adjudicate",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/analysis.schema.json b/skills/research-step/assets/compiled/analysis.schema.json
new file mode 100644
index 0000000..55e557d
--- /dev/null
+++ b/skills/research-step/assets/compiled/analysis.schema.json
@@ -0,0 +1,119 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "analysis": {
+      "additionalProperties": true,
+      "properties": {
+        "assumptions": {
+          "type": "string"
+        },
+        "code": {
+          "type": "string"
+        },
+        "final_answer": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "final_answer",
+        "assumptions",
+        "code"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/analysis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "analysis": {
+      "$ref": "#/$defs/analysis"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "analysis",
+    "figures",
+    "artifacts"
+  ],
+  "title": "analysis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/audit.schema.json b/skills/research-step/assets/compiled/audit.schema.json
new file mode 100644
index 0000000..ca21120
--- /dev/null
+++ b/skills/research-step/assets/compiled/audit.schema.json
@@ -0,0 +1,127 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "audit_report": {
+      "additionalProperties": true,
+      "properties": {
+        "artifacts_found": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "challenges": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "check": {
+                "type": "string"
+              },
+              "concern": {
+                "type": "string"
+              },
+              "outcome": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "concern",
+              "check",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "recommended_adjustment": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "verdict_survives": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "subject_id",
+        "challenges",
+        "artifacts_found",
+        "verdict_survives",
+        "recommended_adjustment"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/audit.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "audit_report": {
+      "$ref": "#/$defs/audit_report"
+    }
+  },
+  "required": [
+    "audit_report",
+    "artifacts"
+  ],
+  "title": "audit",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/cohort_assembly.schema.json b/skills/research-step/assets/compiled/cohort_assembly.schema.json
new file mode 100644
index 0000000..4866540
--- /dev/null
+++ b/skills/research-step/assets/compiled/cohort_assembly.schema.json
@@ -0,0 +1,206 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "cohort": {
+      "additionalProperties": true,
+      "properties": {
+        "discovery_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "exclusion_criteria": {
+          "type": "string"
+        },
+        "holdout_subset": {
+          "additionalProperties": true,
+          "properties": {
+            "definition": {
+              "type": "string"
+            },
+            "n": {
+              "type": "number"
+            },
+            "path": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "definition",
+            "n",
+            "path"
+          ],
+          "type": "object"
+        },
+        "id": {
+          "type": "string"
+        },
+        "inclusion_criteria": {
+          "type": "string"
+        },
+        "research_question": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source_data_sources": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "research_question",
+        "inclusion_criteria",
+        "exclusion_criteria",
+        "sampling",
+        "source_data_sources",
+        "discovery_subset",
+        "holdout_subset",
+        "run_id"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/cohort_assembly.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "cohort": {
+      "$ref": "#/$defs/cohort"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "cohort",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "cohort_assembly",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/data_acquisition.schema.json b/skills/research-step/assets/compiled/data_acquisition.schema.json
new file mode 100644
index 0000000..0bec23c
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_acquisition.schema.json
@@ -0,0 +1,161 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "acquisition": {
+      "additionalProperties": true,
+      "properties": {
+        "access_status": {
+          "enum": [
+            "acquired",
+            "open_unfetched",
+            "restricted",
+            "not_found"
+          ]
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "dataset_id": {
+          "type": "string"
+        },
+        "local_path": {
+          "type": "string"
+        },
+        "validation_note": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "access_status",
+        "local_path",
+        "dataset_id",
+        "validation_note"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_acquisition.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "acquisitions": {
+      "items": {
+        "$ref": "#/$defs/acquisition"
+      },
+      "type": "array"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "acquisitions",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_acquisition",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
new file mode 100644
index 0000000..cb56eed
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd
@@ -0,0 +1,92 @@
+%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit
+%% solid arrows: input wiring · dashed: inputs satisfied outside this flow
+flowchart TD
+  classDef replan stroke-dasharray: 6 4
+  classDef embed fill:#eef6ff,stroke:#6699cc
+  classDef external stroke-dasharray: 3 3,color:#888888
+  subgraph data_provenance["data_provenance [flow: data_provenance]"]
+    data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"]
+    data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"]
+    data_provenance__provenance_synthesis["provenance_synthesis"]
+  end
+  class data_provenance embed
+  subgraph reproduction["reproduction [flow: reproduction]"]
+    reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"]
+    reproduction__law_extraction["law_extraction"]
+    reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"]
+    subgraph reproduction__replication["replication (at replan)"]
+      reproduction__replication__experiment_design["experiment_design<br/>asta experiment"]
+      reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+      reproduction__replication__adjudicate["adjudicate"]
+    end
+    class reproduction__replication replan
+    reproduction__reproduction_synthesis["reproduction_synthesis"]
+  end
+  class reproduction embed
+  subgraph theorizer["theorizer [flow: theorizer]"]
+    theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"]
+    subgraph theorizer__theory_generation["theory_generation"]
+      theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"]
+    end
+    theorizer__testability_triage["testability_triage"]
+    theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"]
+    theorizer__theory_synthesis["theory_synthesis"]
+  end
+  class theorizer embed
+  subgraph verification["verification (at replan)"]
+    verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"]
+    verification__adjudicate["adjudicate"]
+  end
+  class verification replan
+  verification_synthesis["verification_synthesis"]
+  gap_synthesis["gap_synthesis"]
+  final_synthesis["final_synthesis"]
+  data_provenance__provenance_search --> data_provenance__provenance_extraction
+  data_provenance__provenance_search --> data_provenance__data_acquisition
+  data_provenance__provenance_extraction --> data_provenance__data_acquisition
+  data_provenance__provenance_search --> data_provenance__provenance_synthesis
+  data_provenance__provenance_extraction --> data_provenance__provenance_synthesis
+  data_provenance__data_acquisition --> data_provenance__provenance_synthesis
+  reproduction__data_driven_discovery --> reproduction__law_extraction
+  reproduction__law_extraction --> reproduction__evidence_gathering
+  reproduction__law_extraction --> reproduction__replication__experiment_design
+  reproduction__evidence_gathering --> reproduction__replication__experiment_design
+  reproduction__replication__experiment_design --> reproduction__replication__analysis
+  reproduction__evidence_gathering --> reproduction__replication__analysis
+  reproduction__replication__analysis --> reproduction__replication__audit
+  reproduction__replication__experiment_design --> reproduction__replication__adjudicate
+  reproduction__replication__analysis --> reproduction__replication__adjudicate
+  reproduction__replication__audit --> reproduction__replication__adjudicate
+  reproduction__law_extraction --> reproduction__reproduction_synthesis
+  reproduction__replication --> reproduction__reproduction_synthesis
+  reproduction__law_extraction --> theorizer__evidence_extraction
+  reproduction__replication__adjudicate --> theorizer__evidence_extraction
+  theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation
+  theorizer__theory_generation --> theorizer__testability_triage
+  reproduction__data_driven_discovery --> theorizer__testability_triage
+  reproduction__evidence_gathering --> theorizer__testability_triage
+  theorizer__testability_triage --> theorizer__novelty_assessment
+  theorizer__theory_generation --> theorizer__theory_synthesis
+  theorizer__novelty_assessment --> theorizer__theory_synthesis
+  theorizer__testability_triage --> theorizer__theory_synthesis
+  theorizer__testability_triage --> verification__analysis
+  reproduction__data_driven_discovery --> verification__analysis
+  reproduction__evidence_gathering --> verification__analysis
+  verification__analysis --> verification__audit
+  theorizer__testability_triage --> verification__adjudicate
+  verification__analysis --> verification__adjudicate
+  verification__audit --> verification__adjudicate
+  verification --> verification_synthesis
+  theorizer__novelty_assessment --> verification_synthesis
+  data_provenance__provenance_synthesis --> gap_synthesis
+  reproduction__reproduction_synthesis --> gap_synthesis
+  theorizer__theory_synthesis --> gap_synthesis
+  verification_synthesis --> gap_synthesis
+  data_provenance__provenance_synthesis --> final_synthesis
+  reproduction__reproduction_synthesis --> final_synthesis
+  theorizer__theory_synthesis --> final_synthesis
+  verification_synthesis --> final_synthesis
+  gap_synthesis --> final_synthesis
diff --git a/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/skills/research-step/assets/compiled/data_driven_discovery.schema.json
new file mode 100644
index 0000000..14f65a7
--- /dev/null
+++ b/skills/research-step/assets/compiled/data_driven_discovery.schema.json
@@ -0,0 +1,152 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/data_driven_discovery.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "datasets",
+    "artifacts"
+  ],
+  "title": "data_driven_discovery",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/discovery_run.schema.json b/skills/research-step/assets/compiled/discovery_run.schema.json
new file mode 100644
index 0000000..b7ac259
--- /dev/null
+++ b/skills/research-step/assets/compiled/discovery_run.schema.json
@@ -0,0 +1,170 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "experiment": {
+      "additionalProperties": true,
+      "properties": {
+        "analysis": {
+          "type": "string"
+        },
+        "experiment_id": {
+          "type": "string"
+        },
+        "hypothesis": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "experiment_id",
+        "status",
+        "hypothesis",
+        "analysis"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_run.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    },
+    "experiments": {
+      "items": {
+        "$ref": "#/$defs/experiment"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "experiments",
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "discovery_run",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/skills/research-step/assets/compiled/discovery_synthesis.schema.json
new file mode 100644
index 0000000..29cb31f
--- /dev/null
+++ b/skills/research-step/assets/compiled/discovery_synthesis.schema.json
@@ -0,0 +1,271 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "discovery_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "interpretation": {
+          "type": "string"
+        },
+        "laws": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "deciding_experiment": {
+                "type": "string"
+              },
+              "effect_size_discovery": {
+                "type": "string"
+              },
+              "effect_size_holdout": {
+                "type": "string"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "surprise": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "surprise",
+              "outcome",
+              "deciding_experiment",
+              "effect_size_discovery",
+              "effect_size_holdout"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "run_id": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "run_id",
+        "laws",
+        "interpretation",
+        "next_steps",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/discovery_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "discovery_report": {
+      "$ref": "#/$defs/discovery_report"
+    }
+  },
+  "required": [
+    "discovery_report",
+    "artifacts"
+  ],
+  "title": "discovery_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/evidence_extraction.schema.json b/skills/research-step/assets/compiled/evidence_extraction.schema.json
new file mode 100644
index 0000000..7a53a5b
--- /dev/null
+++ b/skills/research-step/assets/compiled/evidence_extraction.schema.json
@@ -0,0 +1,132 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "artifacts"
+  ],
+  "title": "evidence_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/evidence_gathering.schema.json b/skills/research-step/assets/compiled/evidence_gathering.schema.json
new file mode 100644
index 0000000..c310796
--- /dev/null
+++ b/skills/research-step/assets/compiled/evidence_gathering.schema.json
@@ -0,0 +1,121 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "dataset": {
+      "additionalProperties": true,
+      "properties": {
+        "covers_laws": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "definition": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "n": {
+          "type": "number"
+        },
+        "sampling": {
+          "type": "string"
+        },
+        "source": {
+          "type": "string"
+        },
+        "variables": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "id",
+        "definition",
+        "source",
+        "n",
+        "sampling",
+        "variables",
+        "covers_laws"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/evidence_gathering.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "datasets": {
+      "items": {
+        "$ref": "#/$defs/dataset"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "datasets",
+    "artifacts"
+  ],
+  "title": "evidence_gathering",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/experiment_design.schema.json b/skills/research-step/assets/compiled/experiment_design.schema.json
new file mode 100644
index 0000000..458fe42
--- /dev/null
+++ b/skills/research-step/assets/compiled/experiment_design.schema.json
@@ -0,0 +1,162 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "experiment_design": {
+      "additionalProperties": true,
+      "properties": {
+        "construct_equivalence": {
+          "enum": [
+            "equivalent",
+            "proxy",
+            "mismatch"
+          ]
+        },
+        "data_gap": {
+          "type": "string"
+        },
+        "experiment_design_query": {
+          "type": "string"
+        },
+        "experiment_name": {
+          "type": "string"
+        },
+        "feasibility": {
+          "enum": [
+            "feasible",
+            "proxy_only",
+            "data_unavailable",
+            "construct_mismatch"
+          ]
+        },
+        "independent_operationalization": {
+          "type": "string"
+        },
+        "plain_language_description": {
+          "type": "string"
+        },
+        "prespecified": {
+          "additionalProperties": true,
+          "properties": {
+            "metric": {
+              "type": "string"
+            },
+            "success_threshold": {
+              "type": "string"
+            },
+            "test": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "test",
+            "metric",
+            "success_threshold"
+          ],
+          "type": "object"
+        },
+        "required_data": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "experiment_name",
+        "plain_language_description",
+        "source_operationalization",
+        "independent_operationalization",
+        "construct_equivalence",
+        "feasibility",
+        "required_data",
+        "data_gap",
+        "experiment_design_query",
+        "prespecified"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/experiment_design.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "experiment_design": {
+      "$ref": "#/$defs/experiment_design"
+    }
+  },
+  "required": [
+    "experiment_design",
+    "artifacts"
+  ],
+  "title": "experiment_design",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/final_synthesis.schema.json b/skills/research-step/assets/compiled/final_synthesis.schema.json
new file mode 100644
index 0000000..b00f085
--- /dev/null
+++ b/skills/research-step/assets/compiled/final_synthesis.schema.json
@@ -0,0 +1,289 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "research_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "inference_chain": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "chain": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "claim": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "claim",
+              "chain"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sub_reports": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "report_path",
+              "one_line"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "tensions_and_surprises": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "evidence": {
+                "type": "string"
+              },
+              "observation": {
+                "type": "string"
+              },
+              "where": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "observation",
+              "where",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_highlights": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "claim": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_was_done": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theory_highlights",
+        "inference_chain",
+        "what_was_done",
+        "sub_reports",
+        "tensions_and_surprises",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/final_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "research_report": {
+      "$ref": "#/$defs/research_report"
+    }
+  },
+  "required": [
+    "research_report",
+    "artifacts"
+  ],
+  "title": "final_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/flows.json b/skills/research-step/assets/compiled/flows.json
new file mode 100644
index 0000000..907a432
--- /dev/null
+++ b/skills/research-step/assets/compiled/flows.json
@@ -0,0 +1,6657 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "flows": {
+    "auto_discovery": {
+      "edges": [
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "discovery_run"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "cohort_assembly",
+          "source": "cohort_assembly",
+          "target": "replication__holdout_replication"
+        },
+        {
+          "external": false,
+          "input": "discovery_run",
+          "source": "discovery_run",
+          "target": "discovery_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "discovery_synthesis"
+        }
+      ],
+      "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta documents",
+            "asta generate-theories find-and-extract",
+            "asta autodiscovery create",
+            "asta autodiscovery upload",
+            "asta autodiscovery metadata"
+          ],
+          "id": "cohort_assembly",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.",
+          "name": "cohort_assembly",
+          "parent": null,
+          "replan": false,
+          "task": "cohort_assembly"
+        },
+        {
+          "chain": [
+            "asta autodiscovery submit",
+            "asta autodiscovery experiments"
+          ],
+          "id": "discovery_run",
+          "inputs": [
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.",
+          "name": "discovery_run",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_run"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__holdout_replication",
+          "inputs": [
+            "discovery_run",
+            "cohort_assembly"
+          ],
+          "kind": "step",
+          "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "holdout_replication",
+          "parent": "replication",
+          "replan": false,
+          "task": "holdout_replication"
+        },
+        {
+          "chain": [],
+          "id": "discovery_synthesis",
+          "inputs": [
+            "discovery_run",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.",
+          "name": "discovery_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "discovery_synthesis"
+        }
+      ]
+    },
+    "data_and_literature_grounded_theory_generation": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "data_provenance__provenance_search",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "data_provenance__provenance_extraction",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_provenance__data_acquisition",
+          "target": "data_provenance__provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "reproduction__law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "reproduction__replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "reproduction__replication__experiment_design",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "reproduction__replication__analysis",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "reproduction__replication__audit",
+          "target": "reproduction__replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "reproduction__replication",
+          "target": "reproduction__reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "reproduction__law_extraction",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "adjudicate",
+          "source": "reproduction__replication__adjudicate",
+          "target": "theorizer__evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "theorizer__evidence_extraction",
+          "target": "theorizer__theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "theorizer__testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theorizer__theory_generation",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "theorizer__theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "reproduction__data_driven_discovery",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "reproduction__evidence_gathering",
+          "target": "verification__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__audit"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "theorizer__testability_triage",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "verification__analysis",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "verification__audit",
+          "target": "verification__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "verification",
+          "source": "verification",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "theorizer__novelty_assessment",
+          "target": "verification_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "gap_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_synthesis",
+          "source": "data_provenance__provenance_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "reproduction_synthesis",
+          "source": "reproduction__reproduction_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "theory_synthesis",
+          "source": "theorizer__theory_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "verification_synthesis",
+          "source": "verification_synthesis",
+          "target": "final_synthesis"
+        },
+        {
+          "external": false,
+          "input": "gap_synthesis",
+          "source": "gap_synthesis",
+          "target": "final_synthesis"
+        }
+      ],
+      "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.",
+      "nodes": [
+        {
+          "id": "data_provenance",
+          "kind": "embed",
+          "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.",
+          "name": "data_provenance",
+          "parent": null,
+          "replan": false,
+          "workflow": "data_provenance"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "data_provenance__provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "data_provenance__provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_provenance__data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "data_provenance__provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": "data_provenance",
+          "replan": false,
+          "task": "provenance_synthesis"
+        },
+        {
+          "id": "reproduction",
+          "kind": "embed",
+          "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.",
+          "name": "reproduction",
+          "parent": null,
+          "replan": false,
+          "workflow": "reproduction"
+        },
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "reproduction__data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "reproduction__evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "reproduction__replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": "reproduction",
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "reproduction__replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "reproduction__replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "reproduction__replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction__reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": "reproduction",
+          "replan": false,
+          "task": "reproduction_synthesis"
+        },
+        {
+          "id": "theorizer",
+          "kind": "embed",
+          "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.",
+          "name": "theorizer",
+          "parent": null,
+          "replan": false,
+          "workflow": "theorizer"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "theorizer__evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theorizer__theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": "theorizer",
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theorizer__theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theorizer__theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "theorizer__novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theorizer__theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": "theorizer",
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "verification",
+          "kind": "group",
+          "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.",
+          "name": "verification",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__analysis",
+          "inputs": [
+            "testability_triage",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "verification",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "verification__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "verification",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "verification__adjudicate",
+          "inputs": [
+            "testability_triage",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.",
+          "name": "adjudicate",
+          "parent": "verification",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "verification_synthesis",
+          "inputs": [
+            "verification",
+            "novelty_assessment"
+          ],
+          "kind": "step",
+          "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.",
+          "name": "verification_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "verification_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "gap_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.",
+          "name": "gap_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "gap_synthesis"
+        },
+        {
+          "chain": [],
+          "id": "final_synthesis",
+          "inputs": [
+            "provenance_synthesis",
+            "reproduction_synthesis",
+            "theory_synthesis",
+            "verification_synthesis",
+            "gap_synthesis"
+          ],
+          "kind": "step",
+          "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.",
+          "name": "final_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "final_synthesis"
+        }
+      ]
+    },
+    "data_provenance": {
+      "edges": [
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_extraction"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "provenance_search",
+          "source": "provenance_search",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "provenance_extraction",
+          "source": "provenance_extraction",
+          "target": "provenance_synthesis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "data_acquisition",
+          "target": "provenance_synthesis"
+        }
+      ],
+      "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "provenance_search",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).",
+          "name": "provenance_search",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_search"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "provenance_extraction",
+          "inputs": [
+            "provenance_search"
+          ],
+          "kind": "step",
+          "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.",
+          "name": "provenance_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_extraction"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "data_acquisition",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction"
+          ],
+          "kind": "step",
+          "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.",
+          "name": "data_acquisition",
+          "parent": null,
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [],
+          "id": "provenance_synthesis",
+          "inputs": [
+            "provenance_search",
+            "provenance_extraction",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.",
+          "name": "provenance_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "provenance_synthesis"
+        }
+      ]
+    },
+    "hypothesis_driven_research": {
+      "edges": [
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "hypothesis_formation"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "literature_review",
+          "source": "literature_review",
+          "target": "testing__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__data_acquisition"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "data_acquisition",
+          "source": "testing__data_acquisition",
+          "target": "testing__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "testing__experiment_design",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "testing__analysis",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "testing__audit",
+          "target": "testing__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "hypothesis_formation",
+          "source": "hypothesis_formation",
+          "target": "hypothesis_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testing",
+          "source": "testing",
+          "target": "hypothesis_synthesis"
+        }
+      ],
+      "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.",
+      "nodes": [
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search"
+          ],
+          "id": "literature_review",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.",
+          "name": "literature_review",
+          "parent": null,
+          "replan": false,
+          "task": "literature_review"
+        },
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "hypothesis_formation",
+          "inputs": [
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.",
+          "name": "hypothesis_formation",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_formation"
+        },
+        {
+          "id": "testing",
+          "kind": "group",
+          "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.",
+          "name": "testing",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "testing__experiment_design",
+          "inputs": [
+            "hypothesis_formation",
+            "literature_review"
+          ],
+          "kind": "step",
+          "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "testing",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "testing__data_acquisition",
+          "inputs": [
+            "experiment_design"
+          ],
+          "kind": "step",
+          "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.",
+          "name": "data_acquisition",
+          "parent": "testing",
+          "replan": false,
+          "task": "data_acquisition"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__analysis",
+          "inputs": [
+            "experiment_design",
+            "data_acquisition"
+          ],
+          "kind": "step",
+          "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "testing",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "testing__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "testing",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "testing__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.",
+          "name": "adjudicate",
+          "parent": "testing",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "hypothesis_synthesis",
+          "inputs": [
+            "hypothesis_formation",
+            "testing"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.",
+          "name": "hypothesis_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "hypothesis_synthesis"
+        }
+      ]
+    },
+    "reproduction": {
+      "edges": [
+        {
+          "external": false,
+          "input": "data_driven_discovery",
+          "source": "data_driven_discovery",
+          "target": "law_extraction"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "evidence_gathering"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__experiment_design"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "evidence_gathering",
+          "source": "evidence_gathering",
+          "target": "replication__analysis"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__audit"
+        },
+        {
+          "external": false,
+          "input": "experiment_design",
+          "source": "replication__experiment_design",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "analysis",
+          "source": "replication__analysis",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "audit",
+          "source": "replication__audit",
+          "target": "replication__adjudicate"
+        },
+        {
+          "external": false,
+          "input": "law_extraction",
+          "source": "law_extraction",
+          "target": "reproduction_synthesis"
+        },
+        {
+          "external": false,
+          "input": "replication",
+          "source": "replication",
+          "target": "reproduction_synthesis"
+        }
+      ],
+      "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.",
+      "nodes": [
+        {
+          "chain": [
+            "asta autodiscovery run",
+            "asta autodiscovery experiments"
+          ],
+          "id": "data_driven_discovery",
+          "inputs": [],
+          "kind": "step",
+          "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "chain": [],
+          "id": "law_extraction",
+          "inputs": [
+            "data_driven_discovery"
+          ],
+          "kind": "step",
+          "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        },
+        {
+          "chain": [
+            "asta literature find",
+            "asta papers search",
+            "asta documents",
+            "asta autodiscovery upload"
+          ],
+          "id": "evidence_gathering",
+          "inputs": [
+            "law_extraction"
+          ],
+          "kind": "step",
+          "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "replication",
+          "kind": "group",
+          "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.",
+          "name": "replication",
+          "parent": null,
+          "replan": true
+        },
+        {
+          "chain": [
+            "asta experiment"
+          ],
+          "id": "replication__experiment_design",
+          "inputs": [
+            "law_extraction",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.",
+          "name": "experiment_design",
+          "parent": "replication",
+          "replan": false,
+          "task": "experiment_design"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__analysis",
+          "inputs": [
+            "experiment_design",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.",
+          "name": "analysis",
+          "parent": "replication",
+          "replan": false,
+          "task": "analysis"
+        },
+        {
+          "chain": [
+            "asta analyze-data submit",
+            "asta analyze-data poll"
+          ],
+          "id": "replication__audit",
+          "inputs": [
+            "analysis"
+          ],
+          "kind": "step",
+          "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.",
+          "name": "audit",
+          "parent": "replication",
+          "replan": false,
+          "task": "audit"
+        },
+        {
+          "chain": [],
+          "id": "replication__adjudicate",
+          "inputs": [
+            "experiment_design",
+            "analysis",
+            "audit"
+          ],
+          "kind": "step",
+          "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.",
+          "name": "adjudicate",
+          "parent": "replication",
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "chain": [],
+          "id": "reproduction_synthesis",
+          "inputs": [
+            "law_extraction",
+            "replication"
+          ],
+          "kind": "step",
+          "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.",
+          "name": "reproduction_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "reproduction_synthesis"
+        }
+      ]
+    },
+    "theorizer": {
+      "edges": [
+        {
+          "external": true,
+          "input": "law_extraction",
+          "source": "ext__law_extraction",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": true,
+          "input": "adjudicate",
+          "source": "ext__adjudicate",
+          "target": "evidence_extraction"
+        },
+        {
+          "external": false,
+          "input": "evidence_extraction",
+          "source": "evidence_extraction",
+          "target": "theory_generation__theory_formation"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "data_driven_discovery",
+          "source": "ext__data_driven_discovery",
+          "target": "testability_triage"
+        },
+        {
+          "external": true,
+          "input": "evidence_gathering",
+          "source": "ext__evidence_gathering",
+          "target": "testability_triage"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "novelty_assessment"
+        },
+        {
+          "external": false,
+          "input": "theory_generation",
+          "source": "theory_generation",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "novelty_assessment",
+          "source": "novelty_assessment",
+          "target": "theory_synthesis"
+        },
+        {
+          "external": false,
+          "input": "testability_triage",
+          "source": "testability_triage",
+          "target": "theory_synthesis"
+        }
+      ],
+      "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.",
+      "nodes": [
+        {
+          "chain": [
+            "asta generate-theories build-extraction-schema",
+            "asta generate-theories find-and-extract"
+          ],
+          "id": "evidence_extraction",
+          "inputs": [
+            "law_extraction",
+            "adjudicate"
+          ],
+          "kind": "step",
+          "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.",
+          "name": "evidence_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_extraction"
+        },
+        {
+          "id": "theory_generation",
+          "kind": "group",
+          "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.",
+          "name": "theory_generation",
+          "parent": null,
+          "replan": false
+        },
+        {
+          "chain": [
+            "asta generate-theories form-theory"
+          ],
+          "id": "theory_generation__theory_formation",
+          "inputs": [
+            "evidence_extraction"
+          ],
+          "kind": "step",
+          "mission": "Form theories from the shared extraction store under this branch's objective.",
+          "name": "theory_formation",
+          "parent": "theory_generation",
+          "replan": false,
+          "task": "theory_formation"
+        },
+        {
+          "chain": [],
+          "id": "testability_triage",
+          "inputs": [
+            "theory_generation",
+            "data_driven_discovery",
+            "evidence_gathering"
+          ],
+          "kind": "step",
+          "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.",
+          "name": "testability_triage",
+          "parent": null,
+          "replan": false,
+          "task": "testability_triage"
+        },
+        {
+          "chain": [
+            "asta generate-theories evaluate-novelty"
+          ],
+          "id": "novelty_assessment",
+          "inputs": [
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.",
+          "name": "novelty_assessment",
+          "parent": null,
+          "replan": false,
+          "task": "novelty_assessment"
+        },
+        {
+          "chain": [],
+          "id": "theory_synthesis",
+          "inputs": [
+            "theory_generation",
+            "novelty_assessment",
+            "testability_triage"
+          ],
+          "kind": "step",
+          "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.",
+          "name": "theory_synthesis",
+          "parent": null,
+          "replan": false,
+          "task": "theory_synthesis"
+        },
+        {
+          "id": "ext__adjudicate",
+          "kind": "external",
+          "mission": "",
+          "name": "adjudicate",
+          "parent": null,
+          "replan": false,
+          "task": "adjudicate"
+        },
+        {
+          "id": "ext__data_driven_discovery",
+          "kind": "external",
+          "mission": "",
+          "name": "data_driven_discovery",
+          "parent": null,
+          "replan": false,
+          "task": "data_driven_discovery"
+        },
+        {
+          "id": "ext__evidence_gathering",
+          "kind": "external",
+          "mission": "",
+          "name": "evidence_gathering",
+          "parent": null,
+          "replan": false,
+          "task": "evidence_gathering"
+        },
+        {
+          "id": "ext__law_extraction",
+          "kind": "external",
+          "mission": "",
+          "name": "law_extraction",
+          "parent": null,
+          "replan": false,
+          "task": "law_extraction"
+        }
+      ]
+    }
+  },
+  "format_version": 1,
+  "schema_version": 2,
+  "tasks": {
+    "adjudicate": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/adjudicate.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "artifacts"
+        ],
+        "title": "adjudicate",
+        "type": "object"
+      }
+    },
+    "analysis": {
+      "output": {
+        "analysis": "analysis",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "analysis": {
+            "additionalProperties": true,
+            "properties": {
+              "assumptions": {
+                "type": "string"
+              },
+              "code": {
+                "type": "string"
+              },
+              "final_answer": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "final_answer",
+              "assumptions",
+              "code"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/analysis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "analysis": {
+            "$ref": "#/$defs/analysis"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "analysis",
+          "figures",
+          "artifacts"
+        ],
+        "title": "analysis",
+        "type": "object"
+      }
+    },
+    "audit": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "audit_report": "audit_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "audit_report": {
+            "additionalProperties": true,
+            "properties": {
+              "artifacts_found": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "challenges": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "check": {
+                      "type": "string"
+                    },
+                    "concern": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "concern",
+                    "check",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "recommended_adjustment": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "verdict_survives": {
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "subject_id",
+              "challenges",
+              "artifacts_found",
+              "verdict_survives",
+              "recommended_adjustment"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/audit.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "audit_report": {
+            "$ref": "#/$defs/audit_report"
+          }
+        },
+        "required": [
+          "audit_report",
+          "artifacts"
+        ],
+        "title": "audit",
+        "type": "object"
+      }
+    },
+    "cohort_assembly": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "cohort": "cohort",
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "cohort": {
+            "additionalProperties": true,
+            "properties": {
+              "discovery_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "exclusion_criteria": {
+                "type": "string"
+              },
+              "holdout_subset": {
+                "additionalProperties": true,
+                "properties": {
+                  "definition": {
+                    "type": "string"
+                  },
+                  "n": {
+                    "type": "number"
+                  },
+                  "path": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "definition",
+                  "n",
+                  "path"
+                ],
+                "type": "object"
+              },
+              "id": {
+                "type": "string"
+              },
+              "inclusion_criteria": {
+                "type": "string"
+              },
+              "research_question": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source_data_sources": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "research_question",
+              "inclusion_criteria",
+              "exclusion_criteria",
+              "sampling",
+              "source_data_sources",
+              "discovery_subset",
+              "holdout_subset",
+              "run_id"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/cohort_assembly.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "cohort": {
+            "$ref": "#/$defs/cohort"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "cohort",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "cohort_assembly",
+        "type": "object"
+      }
+    },
+    "data_acquisition": {
+      "output": {
+        "acquisitions": [
+          "acquisition"
+        ],
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "acquisition": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "validation_note": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "access_status",
+              "local_path",
+              "dataset_id",
+              "validation_note"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_acquisition.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "acquisitions": {
+            "items": {
+              "$ref": "#/$defs/acquisition"
+            },
+            "type": "array"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "acquisitions",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_acquisition",
+        "type": "object"
+      }
+    },
+    "data_driven_discovery": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/data_driven_discovery.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "datasets",
+          "artifacts"
+        ],
+        "title": "data_driven_discovery",
+        "type": "object"
+      }
+    },
+    "discovery_run": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ],
+        "experiments": [
+          "experiment"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "experiment": {
+            "additionalProperties": true,
+            "properties": {
+              "analysis": {
+                "type": "string"
+              },
+              "experiment_id": {
+                "type": "string"
+              },
+              "hypothesis": {
+                "type": "string"
+              },
+              "status": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "experiment_id",
+              "status",
+              "hypothesis",
+              "analysis"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_run.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          },
+          "experiments": {
+            "items": {
+              "$ref": "#/$defs/experiment"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "experiments",
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "discovery_run",
+        "type": "object"
+      }
+    },
+    "discovery_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "discovery_report": "discovery_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "discovery_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "interpretation": {
+                "type": "string"
+              },
+              "laws": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "deciding_experiment": {
+                      "type": "string"
+                    },
+                    "effect_size_discovery": {
+                      "type": "string"
+                    },
+                    "effect_size_holdout": {
+                      "type": "string"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "surprise": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "surprise",
+                    "outcome",
+                    "deciding_experiment",
+                    "effect_size_discovery",
+                    "effect_size_holdout"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "run_id": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "run_id",
+              "laws",
+              "interpretation",
+              "next_steps",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/discovery_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "discovery_report": {
+            "$ref": "#/$defs/discovery_report"
+          }
+        },
+        "required": [
+          "discovery_report",
+          "artifacts"
+        ],
+        "title": "discovery_synthesis",
+        "type": "object"
+      }
+    },
+    "evidence_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "artifacts"
+        ],
+        "title": "evidence_extraction",
+        "type": "object"
+      }
+    },
+    "evidence_gathering": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "datasets": [
+          "dataset"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "dataset": {
+            "additionalProperties": true,
+            "properties": {
+              "covers_laws": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "definition": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "n": {
+                "type": "number"
+              },
+              "sampling": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "variables": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "id",
+              "definition",
+              "source",
+              "n",
+              "sampling",
+              "variables",
+              "covers_laws"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/evidence_gathering.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "datasets": {
+            "items": {
+              "$ref": "#/$defs/dataset"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "datasets",
+          "artifacts"
+        ],
+        "title": "evidence_gathering",
+        "type": "object"
+      }
+    },
+    "experiment_design": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "experiment_design": "experiment_design"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "experiment_design": {
+            "additionalProperties": true,
+            "properties": {
+              "construct_equivalence": {
+                "enum": [
+                  "equivalent",
+                  "proxy",
+                  "mismatch"
+                ]
+              },
+              "data_gap": {
+                "type": "string"
+              },
+              "experiment_design_query": {
+                "type": "string"
+              },
+              "experiment_name": {
+                "type": "string"
+              },
+              "feasibility": {
+                "enum": [
+                  "feasible",
+                  "proxy_only",
+                  "data_unavailable",
+                  "construct_mismatch"
+                ]
+              },
+              "independent_operationalization": {
+                "type": "string"
+              },
+              "plain_language_description": {
+                "type": "string"
+              },
+              "prespecified": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "experiment_name",
+              "plain_language_description",
+              "source_operationalization",
+              "independent_operationalization",
+              "construct_equivalence",
+              "feasibility",
+              "required_data",
+              "data_gap",
+              "experiment_design_query",
+              "prespecified"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/experiment_design.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "experiment_design": {
+            "$ref": "#/$defs/experiment_design"
+          }
+        },
+        "required": [
+          "experiment_design",
+          "artifacts"
+        ],
+        "title": "experiment_design",
+        "type": "object"
+      }
+    },
+    "final_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "research_report": "research_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "research_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "inference_chain": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "chain": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "claim": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "claim",
+                    "chain"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sub_reports": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "kind": {
+                      "type": "string"
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "report_path": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "kind",
+                    "report_path",
+                    "one_line"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "tensions_and_surprises": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "observation": {
+                      "type": "string"
+                    },
+                    "where": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "observation",
+                    "where",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_highlights": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "claim": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_was_done": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theory_highlights",
+              "inference_chain",
+              "what_was_done",
+              "sub_reports",
+              "tensions_and_surprises",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/final_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "research_report": {
+            "$ref": "#/$defs/research_report"
+          }
+        },
+        "required": [
+          "research_report",
+          "artifacts"
+        ],
+        "title": "final_synthesis",
+        "type": "object"
+      }
+    },
+    "gap_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_gaps_report": "data_gaps_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_gaps_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "arose_in": {
+                      "type": "string"
+                    },
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity",
+                    "arose_in"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "next_steps": {
+                "items": {
+                  "$ref": "#/$defs/next_run_proposal"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "gaps",
+              "next_steps",
+              "figures",
+              "links"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "next_run_proposal": {
+            "additionalProperties": true,
+            "properties": {
+              "data_needed": {
+                "type": "string"
+              },
+              "expected_signature": {
+                "type": "string"
+              },
+              "kind": {
+                "type": "string"
+              },
+              "priority": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              },
+              "tests": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "kind",
+              "title",
+              "tests",
+              "data_needed",
+              "expected_signature",
+              "priority"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/gap_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_gaps_report": {
+            "$ref": "#/$defs/data_gaps_report"
+          }
+        },
+        "required": [
+          "data_gaps_report",
+          "artifacts"
+        ],
+        "title": "gap_synthesis",
+        "type": "object"
+      }
+    },
+    "holdout_replication": {
+      "output": {
+        "adjudication": "adjudication",
+        "artifacts": [
+          "artifact"
+        ],
+        "figures": [
+          "figure"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "adjudication": {
+            "additionalProperties": true,
+            "properties": {
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "prespecified_check": {
+                "type": "string"
+              },
+              "subject_id": {
+                "type": "string"
+              },
+              "subject_kind": {
+                "enum": [
+                  "empirical_law",
+                  "theory",
+                  "hypothesis"
+                ]
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "subject_kind",
+              "subject_id",
+              "outcome",
+              "testability",
+              "effect_size_observed",
+              "prespecified_check",
+              "independence_axes",
+              "data_used",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/holdout_replication.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "adjudication": {
+            "$ref": "#/$defs/adjudication"
+          },
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "figures": {
+            "items": {
+              "$ref": "#/$defs/figure"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "adjudication",
+          "figures",
+          "artifacts"
+        ],
+        "title": "holdout_replication",
+        "type": "object"
+      }
+    },
+    "hypothesis_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypotheses": [
+          "hypothesis"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "hypothesis": {
+            "additionalProperties": true,
+            "properties": {
+              "falsifiable_prediction": {
+                "type": "string"
+              },
+              "grounds": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "rationale": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "rationale",
+              "falsifiable_prediction",
+              "grounds"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypotheses": {
+            "items": {
+              "$ref": "#/$defs/hypothesis"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "hypotheses",
+          "artifacts"
+        ],
+        "title": "hypothesis_formation",
+        "type": "object"
+      }
+    },
+    "hypothesis_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "hypothesis_report": "hypothesis_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "hypothesis_report": {
+            "additionalProperties": true,
+            "properties": {
+              "answer": {
+                "type": "string"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "hypothesis_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "hypothesis_id",
+                    "statement",
+                    "outcome",
+                    "effect_size_observed",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_questions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "question": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "question",
+              "ledger",
+              "answer",
+              "open_questions",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "hypothesis_report": {
+            "$ref": "#/$defs/hypothesis_report"
+          }
+        },
+        "required": [
+          "hypothesis_report",
+          "artifacts"
+        ],
+        "title": "hypothesis_synthesis",
+        "type": "object"
+      }
+    },
+    "law_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "empirical_laws": [
+          "empirical_law"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "empirical_law": {
+            "additionalProperties": true,
+            "properties": {
+              "construct": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "grouping_rationale": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "mcts_provenance": {
+                "additionalProperties": true,
+                "properties": {
+                  "is_surprising": {
+                    "type": "boolean"
+                  },
+                  "posterior_belief": {
+                    "type": "object"
+                  },
+                  "prior_belief": {
+                    "type": "object"
+                  },
+                  "surprise": {
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "surprise",
+                  "is_surprising",
+                  "prior_belief",
+                  "posterior_belief"
+                ],
+                "type": "object"
+              },
+              "source_node": {
+                "type": "string"
+              },
+              "source_operationalization": {
+                "type": "string"
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "statement",
+              "construct",
+              "source_operationalization",
+              "source_node",
+              "effect_size_source",
+              "grouping_rationale"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/law_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "empirical_laws": {
+            "items": {
+              "$ref": "#/$defs/empirical_law"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "empirical_laws",
+          "artifacts"
+        ],
+        "title": "law_extraction",
+        "type": "object"
+      }
+    },
+    "literature_review": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "literature_review": "literature_review"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "literature_review": {
+            "additionalProperties": true,
+            "properties": {
+              "citations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "corpus_id": {
+                      "type": "number"
+                    },
+                    "id": {
+                      "type": "string"
+                    },
+                    "relevance": {
+                      "type": "string"
+                    },
+                    "title": {
+                      "type": "string"
+                    },
+                    "url": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "corpus_id",
+                    "title",
+                    "url",
+                    "relevance"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "key_findings": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "text": {
+                      "type": "string"
+                    },
+                    "uuids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    }
+                  },
+                  "required": [
+                    "text",
+                    "uuids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "open_gaps": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "summary": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "summary",
+              "key_findings",
+              "open_gaps",
+              "citations"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/literature_review.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "literature_review": {
+            "$ref": "#/$defs/literature_review"
+          }
+        },
+        "required": [
+          "literature_review",
+          "artifacts"
+        ],
+        "title": "literature_review",
+        "type": "object"
+      }
+    },
+    "novelty_assessment": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_evaluations": [
+          "theory_evaluation"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_evaluation": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "overall_support": {
+                "enum": [
+                  "supports",
+                  "mixed",
+                  "contradicts",
+                  "inconclusive"
+                ]
+              },
+              "overall_support_raw": {
+                "type": "string"
+              },
+              "statement_evaluations": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "explanation": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "statement_index": {
+                      "type": "number"
+                    }
+                  },
+                  "required": [
+                    "statement_index",
+                    "novelty",
+                    "explanation"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "theory_id",
+              "novelty",
+              "overall_support",
+              "explanation",
+              "statement_evaluations"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/novelty_assessment.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_evaluations": {
+            "items": {
+              "$ref": "#/$defs/theory_evaluation"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theory_evaluations",
+          "artifacts"
+        ],
+        "title": "novelty_assessment",
+        "type": "object"
+      }
+    },
+    "provenance_extraction": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "extracted_data": "extracted_data",
+        "source_access": [
+          "source_access"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "extracted_data": {
+            "additionalProperties": true,
+            "properties": {
+              "extraction_schema_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "rows": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "brief_description": {
+                      "type": "string"
+                    },
+                    "citation_title": {
+                      "type": "string"
+                    },
+                    "name_full": {
+                      "type": "string"
+                    },
+                    "name_short": {
+                      "type": "string"
+                    },
+                    "uuid": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "name_short",
+                    "name_full",
+                    "brief_description",
+                    "citation_title",
+                    "uuid"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "run_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "run_id",
+              "paper_id",
+              "extraction_schema_id",
+              "rows"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "source_access": {
+            "additionalProperties": true,
+            "properties": {
+              "data_availability": {
+                "type": "string"
+              },
+              "data_source_id": {
+                "type": "string"
+              },
+              "identifier": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "data_source_id",
+              "data_availability",
+              "repository",
+              "identifier"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_extraction.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "extracted_data": {
+            "$ref": "#/$defs/extracted_data"
+          },
+          "source_access": {
+            "items": {
+              "$ref": "#/$defs/source_access"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "extracted_data",
+          "source_access",
+          "artifacts"
+        ],
+        "title": "provenance_extraction",
+        "type": "object"
+      }
+    },
+    "provenance_search": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "data_sources": [
+          "data_source"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "data_source": {
+            "additionalProperties": true,
+            "properties": {
+              "dataset_id": {
+                "type": "string"
+              },
+              "id": {
+                "type": "string"
+              },
+              "paper_id": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "dataset_id",
+              "paper_id",
+              "paper_title",
+              "paper_url"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_search.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "data_sources": {
+            "items": {
+              "$ref": "#/$defs/data_source"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "data_sources",
+          "artifacts"
+        ],
+        "title": "provenance_search",
+        "type": "object"
+      }
+    },
+    "provenance_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "provenance_report": "provenance_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "provenance_report": {
+            "additionalProperties": true,
+            "properties": {
+              "acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "not_acquired": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "sources": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "access_status": {
+                      "enum": [
+                        "acquired",
+                        "open_unfetched",
+                        "restricted",
+                        "not_found"
+                      ]
+                    },
+                    "dataset_id": {
+                      "type": "string"
+                    },
+                    "local_path": {
+                      "type": "string"
+                    },
+                    "paper_title": {
+                      "type": "string"
+                    },
+                    "paper_url": {
+                      "type": "string"
+                    },
+                    "repository": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "dataset_id",
+                    "paper_title",
+                    "paper_url",
+                    "repository",
+                    "access_status",
+                    "local_path"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "sources",
+              "method_note",
+              "acquired",
+              "not_acquired",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/provenance_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "provenance_report": {
+            "$ref": "#/$defs/provenance_report"
+          }
+        },
+        "required": [
+          "provenance_report",
+          "artifacts"
+        ],
+        "title": "provenance_synthesis",
+        "type": "object"
+      }
+    },
+    "reproduction_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "reproduction_report": "reproduction_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "reproduction_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "laws_ledger": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "effect_size_observed": {
+                      "type": "string"
+                    },
+                    "effect_size_source": {
+                      "type": "string"
+                    },
+                    "evidence": {
+                      "type": "string"
+                    },
+                    "independence_axes": {
+                      "items": {
+                        "enum": [
+                          "region",
+                          "instrument",
+                          "method",
+                          "construct",
+                          "temporal",
+                          "population"
+                        ]
+                      },
+                      "type": "array"
+                    },
+                    "law_id": {
+                      "type": "string"
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "statement": {
+                      "type": "string"
+                    },
+                    "testability": {
+                      "enum": [
+                        "tested",
+                        "proxy_only",
+                        "untestable"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "law_id",
+                    "statement",
+                    "outcome",
+                    "testability",
+                    "effect_size_source",
+                    "effect_size_observed",
+                    "independence_axes",
+                    "evidence"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "method_note": {
+                "type": "string"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_failed_or_untestable": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_held": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "method_note",
+              "laws_ledger",
+              "what_held",
+              "what_failed_or_untestable",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/reproduction_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "reproduction_report": {
+            "$ref": "#/$defs/reproduction_report"
+          }
+        },
+        "required": [
+          "reproduction_report",
+          "artifacts"
+        ],
+        "title": "reproduction_synthesis",
+        "type": "object"
+      }
+    },
+    "testability_triage": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "testability_triage": "testability_triage"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "testability_triage": {
+            "additionalProperties": true,
+            "properties": {
+              "assessments": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "available_data": {
+                      "type": "string"
+                    },
+                    "gap": {
+                      "type": "string"
+                    },
+                    "proposed_test": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "metric": {
+                          "type": "string"
+                        },
+                        "success_threshold": {
+                          "type": "string"
+                        },
+                        "test": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "test",
+                        "metric",
+                        "success_threshold"
+                      ],
+                      "type": "object"
+                    },
+                    "required_data": {
+                      "type": "string"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "testable_now",
+                    "available_data",
+                    "required_data",
+                    "proposed_test",
+                    "gap"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "testable_theory_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "assessments",
+              "testable_theory_ids"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/testability_triage.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "testability_triage": {
+            "$ref": "#/$defs/testability_triage"
+          }
+        },
+        "required": [
+          "testability_triage",
+          "artifacts"
+        ],
+        "title": "testability_triage",
+        "type": "object"
+      }
+    },
+    "theory_formation": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theories": [
+          "theory"
+        ]
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory": {
+            "additionalProperties": true,
+            "properties": {
+              "components": {
+                "additionalProperties": true,
+                "properties": {
+                  "generation_objective": {
+                    "type": "string"
+                  },
+                  "new_predictions_likely": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "new_predictions_unknown": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statements": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "conflicting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "statement_name": {
+                          "type": "string"
+                        },
+                        "supporting_evidence": {
+                          "items": {
+                            "additionalProperties": true,
+                            "properties": {
+                              "text": {
+                                "type": "string"
+                              },
+                              "uuids": {
+                                "items": {
+                                  "type": "string"
+                                },
+                                "type": "array"
+                              }
+                            },
+                            "required": [
+                              "text",
+                              "uuids"
+                            ],
+                            "type": "object"
+                          },
+                          "type": "array"
+                        },
+                        "theory_statement": {
+                          "type": "string"
+                        }
+                      },
+                      "required": [
+                        "statement_name",
+                        "theory_statement",
+                        "supporting_evidence",
+                        "conflicting_evidence"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "unaccounted_for": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "generation_objective",
+                  "theory_statements",
+                  "new_predictions_likely",
+                  "new_predictions_unknown",
+                  "unaccounted_for"
+                ],
+                "type": "object"
+              },
+              "description": {
+                "type": "string"
+              },
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "id": {
+                "type": "string"
+              },
+              "name": {
+                "type": "string"
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "theory_query": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "name",
+              "description",
+              "theory_query",
+              "objective",
+              "grounds_law_ids",
+              "supporting_evidence_ids",
+              "components"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_formation.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theories": {
+            "items": {
+              "$ref": "#/$defs/theory"
+            },
+            "type": "array"
+          }
+        },
+        "required": [
+          "theories",
+          "artifacts"
+        ],
+        "title": "theory_formation",
+        "type": "object"
+      }
+    },
+    "theory_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "theory_report": "theory_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "theory_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "mechanism": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "grounded_in": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  },
+                  "statement": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "statement",
+                  "grounded_in",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "new_predictions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "novelty_summary": {
+                "type": "string"
+              },
+              "open_threads": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "theories": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "grounds_law_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "name": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "objective": {
+                      "enum": [
+                        "accuracy_focused",
+                        "novelty_focused"
+                      ]
+                    },
+                    "one_line": {
+                      "type": "string"
+                    },
+                    "supporting_evidence_ids": {
+                      "items": {
+                        "type": "string"
+                      },
+                      "type": "array"
+                    },
+                    "testable_now": {
+                      "type": "boolean"
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "name",
+                    "objective",
+                    "one_line",
+                    "grounds_law_ids",
+                    "novelty",
+                    "testable_now",
+                    "supporting_evidence_ids"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "title": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "mechanism",
+              "theories",
+              "novelty_summary",
+              "new_predictions",
+              "open_threads",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/theory_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "theory_report": {
+            "$ref": "#/$defs/theory_report"
+          }
+        },
+        "required": [
+          "theory_report",
+          "artifacts"
+        ],
+        "title": "theory_synthesis",
+        "type": "object"
+      }
+    },
+    "verification_synthesis": {
+      "output": {
+        "artifacts": [
+          "artifact"
+        ],
+        "verification_report": "verification_report"
+      },
+      "schema": {
+        "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+        "$defs": {
+          "artifact": {
+            "additionalProperties": true,
+            "properties": {
+              "artifactId": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "extensions": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "metadata": {
+                "type": "object"
+              },
+              "name": {
+                "type": "string"
+              },
+              "parts": {
+                "items": {
+                  "$ref": "#/$defs/part"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "artifactId",
+              "name",
+              "description",
+              "parts"
+            ],
+            "type": "object"
+          },
+          "figure": {
+            "additionalProperties": true,
+            "properties": {
+              "caption": {
+                "type": "string"
+              },
+              "image": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "caption",
+              "image"
+            ],
+            "type": "object"
+          },
+          "part": {
+            "additionalProperties": true,
+            "properties": {
+              "kind": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object"
+              }
+            },
+            "required": [
+              "kind"
+            ],
+            "type": "object"
+          },
+          "verification_report": {
+            "additionalProperties": true,
+            "properties": {
+              "figures": {
+                "items": {
+                  "$ref": "#/$defs/figure"
+                },
+                "type": "array"
+              },
+              "gaps": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "blocks": {
+                      "type": "string"
+                    },
+                    "item": {
+                      "type": "string"
+                    },
+                    "missing_data": {
+                      "type": "string"
+                    },
+                    "severity": {
+                      "enum": [
+                        "high",
+                        "medium",
+                        "low"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "item",
+                    "missing_data",
+                    "blocks",
+                    "severity"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "headline": {
+                "type": "string"
+              },
+              "links": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "label": {
+                      "type": "string"
+                    },
+                    "ref": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "label",
+                    "ref"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "novelty_by_verification": {
+                "items": {
+                  "additionalProperties": true,
+                  "properties": {
+                    "audit_survived": {
+                      "type": "boolean"
+                    },
+                    "claim": {
+                      "type": "string"
+                    },
+                    "data_used": {
+                      "type": "string"
+                    },
+                    "effect_size": {
+                      "type": "string"
+                    },
+                    "novelty": {
+                      "enum": [
+                        "established",
+                        "derivable",
+                        "genuinely_new"
+                      ]
+                    },
+                    "outcome": {
+                      "enum": [
+                        "held",
+                        "partial",
+                        "failed",
+                        "underpowered",
+                        "n/a"
+                      ]
+                    },
+                    "theory_id": {
+                      "type": "string"
+                    }
+                  },
+                  "required": [
+                    "theory_id",
+                    "claim",
+                    "novelty",
+                    "outcome",
+                    "effect_size",
+                    "data_used",
+                    "audit_survived"
+                  ],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "report_path": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "what_could_not_be_tested": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "what_was_tested": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "report_path",
+              "title",
+              "headline",
+              "novelty_by_verification",
+              "what_was_tested",
+              "what_could_not_be_tested",
+              "figures",
+              "gaps",
+              "links"
+            ],
+            "type": "object"
+          }
+        },
+        "$id": "asta-research-step/verification_synthesis.schema.json",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "additionalProperties": false,
+        "properties": {
+          "artifacts": {
+            "items": {
+              "$ref": "#/$defs/artifact"
+            },
+            "type": "array"
+          },
+          "verification_report": {
+            "$ref": "#/$defs/verification_report"
+          }
+        },
+        "required": [
+          "verification_report",
+          "artifacts"
+        ],
+        "title": "verification_synthesis",
+        "type": "object"
+      }
+    }
+  }
+}
diff --git a/skills/research-step/assets/compiled/gap_synthesis.schema.json b/skills/research-step/assets/compiled/gap_synthesis.schema.json
new file mode 100644
index 0000000..760fbb5
--- /dev/null
+++ b/skills/research-step/assets/compiled/gap_synthesis.schema.json
@@ -0,0 +1,221 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_gaps_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "arose_in": {
+                "type": "string"
+              },
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity",
+              "arose_in"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "next_steps": {
+          "items": {
+            "$ref": "#/$defs/next_run_proposal"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "gaps",
+        "next_steps",
+        "figures",
+        "links"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "next_run_proposal": {
+      "additionalProperties": true,
+      "properties": {
+        "data_needed": {
+          "type": "string"
+        },
+        "expected_signature": {
+          "type": "string"
+        },
+        "kind": {
+          "type": "string"
+        },
+        "priority": {
+          "enum": [
+            "high",
+            "medium",
+            "low"
+          ]
+        },
+        "tests": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "kind",
+        "title",
+        "tests",
+        "data_needed",
+        "expected_signature",
+        "priority"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/gap_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_gaps_report": {
+      "$ref": "#/$defs/data_gaps_report"
+    }
+  },
+  "required": [
+    "data_gaps_report",
+    "artifacts"
+  ],
+  "title": "gap_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/holdout_replication.schema.json b/skills/research-step/assets/compiled/holdout_replication.schema.json
new file mode 100644
index 0000000..9d18252
--- /dev/null
+++ b/skills/research-step/assets/compiled/holdout_replication.schema.json
@@ -0,0 +1,167 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "adjudication": {
+      "additionalProperties": true,
+      "properties": {
+        "data_used": {
+          "type": "string"
+        },
+        "effect_size_observed": {
+          "type": "string"
+        },
+        "evidence": {
+          "type": "string"
+        },
+        "independence_axes": {
+          "items": {
+            "enum": [
+              "region",
+              "instrument",
+              "method",
+              "construct",
+              "temporal",
+              "population"
+            ]
+          },
+          "type": "array"
+        },
+        "outcome": {
+          "enum": [
+            "held",
+            "partial",
+            "failed",
+            "underpowered",
+            "n/a"
+          ]
+        },
+        "prespecified_check": {
+          "type": "string"
+        },
+        "subject_id": {
+          "type": "string"
+        },
+        "subject_kind": {
+          "enum": [
+            "empirical_law",
+            "theory",
+            "hypothesis"
+          ]
+        },
+        "testability": {
+          "enum": [
+            "tested",
+            "proxy_only",
+            "untestable"
+          ]
+        }
+      },
+      "required": [
+        "subject_kind",
+        "subject_id",
+        "outcome",
+        "testability",
+        "effect_size_observed",
+        "prespecified_check",
+        "independence_axes",
+        "data_used",
+        "evidence"
+      ],
+      "type": "object"
+    },
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/holdout_replication.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "adjudication": {
+      "$ref": "#/$defs/adjudication"
+    },
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "figures": {
+      "items": {
+        "$ref": "#/$defs/figure"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "adjudication",
+    "figures",
+    "artifacts"
+  ],
+  "title": "holdout_replication",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/skills/research-step/assets/compiled/hypothesis_formation.schema.json
new file mode 100644
index 0000000..694d94f
--- /dev/null
+++ b/skills/research-step/assets/compiled/hypothesis_formation.schema.json
@@ -0,0 +1,126 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "hypothesis": {
+      "additionalProperties": true,
+      "properties": {
+        "falsifiable_prediction": {
+          "type": "string"
+        },
+        "grounds": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "rationale": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "rationale",
+        "falsifiable_prediction",
+        "grounds"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypotheses": {
+      "items": {
+        "$ref": "#/$defs/hypothesis"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "hypotheses",
+    "artifacts"
+  ],
+  "title": "hypothesis_formation",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
new file mode 100644
index 0000000..b2fe767
--- /dev/null
+++ b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json
@@ -0,0 +1,224 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "hypothesis_report": {
+      "additionalProperties": true,
+      "properties": {
+        "answer": {
+          "type": "string"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "hypothesis_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "hypothesis_id",
+              "statement",
+              "outcome",
+              "effect_size_observed",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_questions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "question": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "question",
+        "ledger",
+        "answer",
+        "open_questions",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/hypothesis_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "hypothesis_report": {
+      "$ref": "#/$defs/hypothesis_report"
+    }
+  },
+  "required": [
+    "hypothesis_report",
+    "artifacts"
+  ],
+  "title": "hypothesis_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/law_extraction.schema.json b/skills/research-step/assets/compiled/law_extraction.schema.json
new file mode 100644
index 0000000..7b3e1fc
--- /dev/null
+++ b/skills/research-step/assets/compiled/law_extraction.schema.json
@@ -0,0 +1,139 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "empirical_law": {
+      "additionalProperties": true,
+      "properties": {
+        "construct": {
+          "type": "string"
+        },
+        "effect_size_source": {
+          "type": "string"
+        },
+        "grouping_rationale": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "mcts_provenance": {
+          "additionalProperties": true,
+          "properties": {
+            "is_surprising": {
+              "type": "boolean"
+            },
+            "posterior_belief": {
+              "type": "object"
+            },
+            "prior_belief": {
+              "type": "object"
+            },
+            "surprise": {
+              "type": "number"
+            }
+          },
+          "required": [
+            "surprise",
+            "is_surprising",
+            "prior_belief",
+            "posterior_belief"
+          ],
+          "type": "object"
+        },
+        "source_node": {
+          "type": "string"
+        },
+        "source_operationalization": {
+          "type": "string"
+        },
+        "statement": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "statement",
+        "construct",
+        "source_operationalization",
+        "source_node",
+        "effect_size_source",
+        "grouping_rationale"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/law_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "empirical_laws": {
+      "items": {
+        "$ref": "#/$defs/empirical_law"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "empirical_laws",
+    "artifacts"
+  ],
+  "title": "law_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/literature_review.schema.json b/skills/research-step/assets/compiled/literature_review.schema.json
new file mode 100644
index 0000000..14df7b7
--- /dev/null
+++ b/skills/research-step/assets/compiled/literature_review.schema.json
@@ -0,0 +1,150 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "literature_review": {
+      "additionalProperties": true,
+      "properties": {
+        "citations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "corpus_id": {
+                "type": "number"
+              },
+              "id": {
+                "type": "string"
+              },
+              "relevance": {
+                "type": "string"
+              },
+              "title": {
+                "type": "string"
+              },
+              "url": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "id",
+              "corpus_id",
+              "title",
+              "url",
+              "relevance"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "key_findings": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "text": {
+                "type": "string"
+              },
+              "uuids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "text",
+              "uuids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "open_gaps": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "summary": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "summary",
+        "key_findings",
+        "open_gaps",
+        "citations"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/literature_review.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "literature_review": {
+      "$ref": "#/$defs/literature_review"
+    }
+  },
+  "required": [
+    "literature_review",
+    "artifacts"
+  ],
+  "title": "literature_review",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/novelty_assessment.schema.json b/skills/research-step/assets/compiled/novelty_assessment.schema.json
new file mode 100644
index 0000000..729f9fe
--- /dev/null
+++ b/skills/research-step/assets/compiled/novelty_assessment.schema.json
@@ -0,0 +1,147 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_evaluation": {
+      "additionalProperties": true,
+      "properties": {
+        "explanation": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "novelty": {
+          "enum": [
+            "established",
+            "derivable",
+            "genuinely_new"
+          ]
+        },
+        "overall_support": {
+          "enum": [
+            "supports",
+            "mixed",
+            "contradicts",
+            "inconclusive"
+          ]
+        },
+        "overall_support_raw": {
+          "type": "string"
+        },
+        "statement_evaluations": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "explanation": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "statement_index": {
+                "type": "number"
+              }
+            },
+            "required": [
+              "statement_index",
+              "novelty",
+              "explanation"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "theory_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "theory_id",
+        "novelty",
+        "overall_support",
+        "explanation",
+        "statement_evaluations"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/novelty_assessment.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_evaluations": {
+      "items": {
+        "$ref": "#/$defs/theory_evaluation"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theory_evaluations",
+    "artifacts"
+  ],
+  "title": "novelty_assessment",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_extraction.schema.json b/skills/research-step/assets/compiled/provenance_extraction.schema.json
new file mode 100644
index 0000000..2bd4ea8
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_extraction.schema.json
@@ -0,0 +1,163 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "extracted_data": {
+      "additionalProperties": true,
+      "properties": {
+        "extraction_schema_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "rows": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "brief_description": {
+                "type": "string"
+              },
+              "citation_title": {
+                "type": "string"
+              },
+              "name_full": {
+                "type": "string"
+              },
+              "name_short": {
+                "type": "string"
+              },
+              "uuid": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name_short",
+              "name_full",
+              "brief_description",
+              "citation_title",
+              "uuid"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "run_id": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "run_id",
+        "paper_id",
+        "extraction_schema_id",
+        "rows"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "source_access": {
+      "additionalProperties": true,
+      "properties": {
+        "data_availability": {
+          "type": "string"
+        },
+        "data_source_id": {
+          "type": "string"
+        },
+        "identifier": {
+          "type": "string"
+        },
+        "repository": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "data_source_id",
+        "data_availability",
+        "repository",
+        "identifier"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_extraction.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "extracted_data": {
+      "$ref": "#/$defs/extracted_data"
+    },
+    "source_access": {
+      "items": {
+        "$ref": "#/$defs/source_access"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "extracted_data",
+    "source_access",
+    "artifacts"
+  ],
+  "title": "provenance_extraction",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_search.schema.json b/skills/research-step/assets/compiled/provenance_search.schema.json
new file mode 100644
index 0000000..8a924d9
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_search.schema.json
@@ -0,0 +1,107 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "data_source": {
+      "additionalProperties": true,
+      "properties": {
+        "dataset_id": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "paper_id": {
+          "type": "string"
+        },
+        "paper_title": {
+          "type": "string"
+        },
+        "paper_url": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "dataset_id",
+        "paper_id",
+        "paper_title",
+        "paper_url"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_search.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "data_sources": {
+      "items": {
+        "$ref": "#/$defs/data_source"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "data_sources",
+    "artifacts"
+  ],
+  "title": "provenance_search",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/skills/research-step/assets/compiled/provenance_synthesis.schema.json
new file mode 100644
index 0000000..0d43a6f
--- /dev/null
+++ b/skills/research-step/assets/compiled/provenance_synthesis.schema.json
@@ -0,0 +1,230 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "provenance_report": {
+      "additionalProperties": true,
+      "properties": {
+        "acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "not_acquired": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "sources": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "access_status": {
+                "enum": [
+                  "acquired",
+                  "open_unfetched",
+                  "restricted",
+                  "not_found"
+                ]
+              },
+              "dataset_id": {
+                "type": "string"
+              },
+              "local_path": {
+                "type": "string"
+              },
+              "paper_title": {
+                "type": "string"
+              },
+              "paper_url": {
+                "type": "string"
+              },
+              "repository": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "dataset_id",
+              "paper_title",
+              "paper_url",
+              "repository",
+              "access_status",
+              "local_path"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "sources",
+        "method_note",
+        "acquired",
+        "not_acquired",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/provenance_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "provenance_report": {
+      "$ref": "#/$defs/provenance_report"
+    }
+  },
+  "required": [
+    "provenance_report",
+    "artifacts"
+  ],
+  "title": "provenance_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
new file mode 100644
index 0000000..570e076
--- /dev/null
+++ b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json
@@ -0,0 +1,253 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "reproduction_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "laws_ledger": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "effect_size_observed": {
+                "type": "string"
+              },
+              "effect_size_source": {
+                "type": "string"
+              },
+              "evidence": {
+                "type": "string"
+              },
+              "independence_axes": {
+                "items": {
+                  "enum": [
+                    "region",
+                    "instrument",
+                    "method",
+                    "construct",
+                    "temporal",
+                    "population"
+                  ]
+                },
+                "type": "array"
+              },
+              "law_id": {
+                "type": "string"
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "statement": {
+                "type": "string"
+              },
+              "testability": {
+                "enum": [
+                  "tested",
+                  "proxy_only",
+                  "untestable"
+                ]
+              }
+            },
+            "required": [
+              "law_id",
+              "statement",
+              "outcome",
+              "testability",
+              "effect_size_source",
+              "effect_size_observed",
+              "independence_axes",
+              "evidence"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "method_note": {
+          "type": "string"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_failed_or_untestable": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_held": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "method_note",
+        "laws_ledger",
+        "what_held",
+        "what_failed_or_untestable",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/reproduction_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "reproduction_report": {
+      "$ref": "#/$defs/reproduction_report"
+    }
+  },
+  "required": [
+    "reproduction_report",
+    "artifacts"
+  ],
+  "title": "reproduction_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/testability_triage.schema.json b/skills/research-step/assets/compiled/testability_triage.schema.json
new file mode 100644
index 0000000..8968920
--- /dev/null
+++ b/skills/research-step/assets/compiled/testability_triage.schema.json
@@ -0,0 +1,144 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "testability_triage": {
+      "additionalProperties": true,
+      "properties": {
+        "assessments": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "available_data": {
+                "type": "string"
+              },
+              "gap": {
+                "type": "string"
+              },
+              "proposed_test": {
+                "additionalProperties": true,
+                "properties": {
+                  "metric": {
+                    "type": "string"
+                  },
+                  "success_threshold": {
+                    "type": "string"
+                  },
+                  "test": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "test",
+                  "metric",
+                  "success_threshold"
+                ],
+                "type": "object"
+              },
+              "required_data": {
+                "type": "string"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "testable_now",
+              "available_data",
+              "required_data",
+              "proposed_test",
+              "gap"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "testable_theory_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "assessments",
+        "testable_theory_ids"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/testability_triage.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "testability_triage": {
+      "$ref": "#/$defs/testability_triage"
+    }
+  },
+  "required": [
+    "testability_triage",
+    "artifacts"
+  ],
+  "title": "testability_triage",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/theory_formation.schema.json b/skills/research-step/assets/compiled/theory_formation.schema.json
new file mode 100644
index 0000000..7373cec
--- /dev/null
+++ b/skills/research-step/assets/compiled/theory_formation.schema.json
@@ -0,0 +1,240 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory": {
+      "additionalProperties": true,
+      "properties": {
+        "components": {
+          "additionalProperties": true,
+          "properties": {
+            "generation_objective": {
+              "type": "string"
+            },
+            "new_predictions_likely": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "new_predictions_unknown": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "theory_statements": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "conflicting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "statement_name": {
+                    "type": "string"
+                  },
+                  "supporting_evidence": {
+                    "items": {
+                      "additionalProperties": true,
+                      "properties": {
+                        "text": {
+                          "type": "string"
+                        },
+                        "uuids": {
+                          "items": {
+                            "type": "string"
+                          },
+                          "type": "array"
+                        }
+                      },
+                      "required": [
+                        "text",
+                        "uuids"
+                      ],
+                      "type": "object"
+                    },
+                    "type": "array"
+                  },
+                  "theory_statement": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "statement_name",
+                  "theory_statement",
+                  "supporting_evidence",
+                  "conflicting_evidence"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            },
+            "unaccounted_for": {
+              "items": {
+                "additionalProperties": true,
+                "properties": {
+                  "text": {
+                    "type": "string"
+                  },
+                  "uuids": {
+                    "items": {
+                      "type": "string"
+                    },
+                    "type": "array"
+                  }
+                },
+                "required": [
+                  "text",
+                  "uuids"
+                ],
+                "type": "object"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "generation_objective",
+            "theory_statements",
+            "new_predictions_likely",
+            "new_predictions_unknown",
+            "unaccounted_for"
+          ],
+          "type": "object"
+        },
+        "description": {
+          "type": "string"
+        },
+        "grounds_law_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "objective": {
+          "enum": [
+            "accuracy_focused",
+            "novelty_focused"
+          ]
+        },
+        "supporting_evidence_ids": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "theory_query": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "name",
+        "description",
+        "theory_query",
+        "objective",
+        "grounds_law_ids",
+        "supporting_evidence_ids",
+        "components"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_formation.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theories": {
+      "items": {
+        "$ref": "#/$defs/theory"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "theories",
+    "artifacts"
+  ],
+  "title": "theory_formation",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/theory_synthesis.schema.json b/skills/research-step/assets/compiled/theory_synthesis.schema.json
new file mode 100644
index 0000000..dd2768e
--- /dev/null
+++ b/skills/research-step/assets/compiled/theory_synthesis.schema.json
@@ -0,0 +1,280 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "theory_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "mechanism": {
+          "additionalProperties": true,
+          "properties": {
+            "conflicting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "grounded_in": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            },
+            "statement": {
+              "type": "string"
+            },
+            "supporting_evidence": {
+              "items": {
+                "type": "string"
+              },
+              "type": "array"
+            }
+          },
+          "required": [
+            "statement",
+            "grounded_in",
+            "supporting_evidence",
+            "conflicting_evidence"
+          ],
+          "type": "object"
+        },
+        "new_predictions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "novelty_summary": {
+          "type": "string"
+        },
+        "open_threads": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "theories": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "grounds_law_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "name": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "objective": {
+                "enum": [
+                  "accuracy_focused",
+                  "novelty_focused"
+                ]
+              },
+              "one_line": {
+                "type": "string"
+              },
+              "supporting_evidence_ids": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              },
+              "testable_now": {
+                "type": "boolean"
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "name",
+              "objective",
+              "one_line",
+              "grounds_law_ids",
+              "novelty",
+              "testable_now",
+              "supporting_evidence_ids"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "mechanism",
+        "theories",
+        "novelty_summary",
+        "new_predictions",
+        "open_threads",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/theory_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "theory_report": {
+      "$ref": "#/$defs/theory_report"
+    }
+  },
+  "required": [
+    "theory_report",
+    "artifacts"
+  ],
+  "title": "theory_synthesis",
+  "type": "object"
+}
diff --git a/skills/research-step/assets/compiled/verification_synthesis.schema.json b/skills/research-step/assets/compiled/verification_synthesis.schema.json
new file mode 100644
index 0000000..8d1a639
--- /dev/null
+++ b/skills/research-step/assets/compiled/verification_synthesis.schema.json
@@ -0,0 +1,232 @@
+{
+  "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit",
+  "$defs": {
+    "artifact": {
+      "additionalProperties": true,
+      "properties": {
+        "artifactId": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "extensions": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parts": {
+          "items": {
+            "$ref": "#/$defs/part"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "artifactId",
+        "name",
+        "description",
+        "parts"
+      ],
+      "type": "object"
+    },
+    "figure": {
+      "additionalProperties": true,
+      "properties": {
+        "caption": {
+          "type": "string"
+        },
+        "image": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "caption",
+        "image"
+      ],
+      "type": "object"
+    },
+    "part": {
+      "additionalProperties": true,
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "metadata": {
+          "type": "object"
+        }
+      },
+      "required": [
+        "kind"
+      ],
+      "type": "object"
+    },
+    "verification_report": {
+      "additionalProperties": true,
+      "properties": {
+        "figures": {
+          "items": {
+            "$ref": "#/$defs/figure"
+          },
+          "type": "array"
+        },
+        "gaps": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "blocks": {
+                "type": "string"
+              },
+              "item": {
+                "type": "string"
+              },
+              "missing_data": {
+                "type": "string"
+              },
+              "severity": {
+                "enum": [
+                  "high",
+                  "medium",
+                  "low"
+                ]
+              }
+            },
+            "required": [
+              "item",
+              "missing_data",
+              "blocks",
+              "severity"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "headline": {
+          "type": "string"
+        },
+        "links": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "label": {
+                "type": "string"
+              },
+              "ref": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "label",
+              "ref"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "novelty_by_verification": {
+          "items": {
+            "additionalProperties": true,
+            "properties": {
+              "audit_survived": {
+                "type": "boolean"
+              },
+              "claim": {
+                "type": "string"
+              },
+              "data_used": {
+                "type": "string"
+              },
+              "effect_size": {
+                "type": "string"
+              },
+              "novelty": {
+                "enum": [
+                  "established",
+                  "derivable",
+                  "genuinely_new"
+                ]
+              },
+              "outcome": {
+                "enum": [
+                  "held",
+                  "partial",
+                  "failed",
+                  "underpowered",
+                  "n/a"
+                ]
+              },
+              "theory_id": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "theory_id",
+              "claim",
+              "novelty",
+              "outcome",
+              "effect_size",
+              "data_used",
+              "audit_survived"
+            ],
+            "type": "object"
+          },
+          "type": "array"
+        },
+        "report_path": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        },
+        "what_could_not_be_tested": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "what_was_tested": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "report_path",
+        "title",
+        "headline",
+        "novelty_by_verification",
+        "what_was_tested",
+        "what_could_not_be_tested",
+        "figures",
+        "gaps",
+        "links"
+      ],
+      "type": "object"
+    }
+  },
+  "$id": "asta-research-step/verification_synthesis.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "additionalProperties": false,
+  "properties": {
+    "artifacts": {
+      "items": {
+        "$ref": "#/$defs/artifact"
+      },
+      "type": "array"
+    },
+    "verification_report": {
+      "$ref": "#/$defs/verification_report"
+    }
+  },
+  "required": [
+    "verification_report",
+    "artifacts"
+  ],
+  "title": "verification_synthesis",
+  "type": "object"
+}