diff --git a/.gitignore b/.gitignore index 2fe355b..787b618 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,7 @@ skills-lock.json .idea/ *.iml +# macOS +.DS_Store + .asta diff --git a/plugins/asta-preview/skills/research-step/SKILL.md b/plugins/asta-preview/skills/research-step/SKILL.md index 0d2fcee..e9f9a8c 100644 --- a/plugins/asta-preview/skills/research-step/SKILL.md +++ b/plugins/asta-preview/skills/research-step/SKILL.md @@ -1,12 +1,12 @@ --- name: research-step description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs. -allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*) +allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Bash(asta:*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*) --- # Research Step -Models a research session as a beads epic. Each unit of work is a typed sub-issue whose `metadata.research_step.output` matches a JSON schema in `assets/schemas.yaml`. +Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `hypothesis_driven_research` flow (literature → falsifiable hypotheses → one prespecified test per hypothesis), the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; run it as its own session in a **separate workspace** — own `mission.md` and `.beads` — typically kicked off after a theory-generation run; a second epic root in the same workspace breaks `scripts/epic-root.sh`), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types` (immutable records — verdicts are `adjudication` records referencing their subject), the `tasks` (pure output contracts mapping each output key to its type), and the `flows` (each step carrying its `mission`, its `input` steps, and its asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`. This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first. @@ -23,7 +23,7 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec | `mission.md` | Input. The research task. | | `.beads/` | Source of truth for state. | | `summary.md` | Derived view of the session, regenerated by **update-summary**. Beads is the source of truth; this file is just a digest for humans and for **brainstorm**. Frontmatter `beads_snapshot` records the state it was rendered from. | -| `background_knowledge.txt` | Optional. Long-form context referenced from issue metadata via `summary_path`. | +| `.asta///` | Heavy artifacts (raw agent JSON, datasets, reports), referenced from `output_json` by repo-root-relative `_path` fields. | ## Workflows @@ -51,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working - **init** → always run **plan** afterwards (which then chains to **update-summary**). - **plan** → always run **update-summary** afterwards so the digest reflects the new graph. -- **execute** → if the closed task type is `literature_review`, `hypothesis`, `analysis`, or `synthesis`, chain to **plan** (which chains to **update-summary**); otherwise chain directly to **update-summary**. +- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off rule in `execute.md`, last step); otherwise chain directly to **update-summary**. - **update-summary** and **brainstorm** → never chain. ## Boundaries diff --git a/plugins/asta-preview/skills/research-step/assets/schemas.yaml b/plugins/asta-preview/skills/research-step/assets/schemas.yaml index b840628..b5ead12 100644 --- a/plugins/asta-preview/skills/research-step/assets/schemas.yaml +++ b/plugins/asta-preview/skills/research-step/assets/schemas.yaml @@ -1,80 +1,638 @@ -# Output schemas for research-step task types. -# Each task issue stores its realized output at metadata.research_step.output, -# matching the shape under `output:` for its task_type. - -schema_version: 1 - -task_types: - - scope: - inputs: [] - output: - question: string # the precise research question - boundaries: [string] # what is in / out of scope - success_criteria: [string] # how we know we have answered it - - definitions: - inputs: [scope] - output: - terms: - - name: string - operational_definition: string - rationale: string - - literature_review: - inputs: [scope, definitions] - output: - summary_path: string # relative path; long-form context - key_findings: [string] # 3-10 bullets readable without opening summary_path - gaps: [string] # gaps that motivate hypotheses - citations: - - id: string - title: string - url: string - relevance: string - - hypothesis: - inputs: [scope, literature_review] - output: - statement: string # H_n: ... - rationale: string - falsifiable_prediction: string - expected_evidence: [string] - - experiment_design: - inputs: [hypothesis] - output: - method: string - procedure: [string] # ordered steps - variables: - independent: [string] - dependent: [string] - controls: [string] - artifacts_expected: [string] # paths the gathering step will produce - - evidence_gathering: - inputs: [experiment_design] - output: - artifacts: - - path: string - kind: string # data | log | figure | code | other - description: string - log_path: string # what was actually run - deviations: [string] # ways execution diverged from design - - analysis: - inputs: [hypothesis, evidence_gathering] - output: - verdict: enum [supported, refuted, inconclusive] - confidence: number # 0.0 - 1.0 - reasoning: string - caveats: [string] - - synthesis: - inputs: [scope, analysis_*] # all analysis issues in the epic - output: - answer: string # answer to scope.question - supporting_hypotheses: [bd_id] - refuted_hypotheses: [bd_id] - open_questions: [string] # become discovered-from edges on re-plan - report_path: string # generated markdown report +version: 2 + +config: + # Session-tunable knobs and their defaults. A mission.md may override any of + # them in a `## Config` section (one `key: value` line each). plan's bootstrap + # resolves defaults + mission overrides and pins the result on the epic root + # (metadata.research_step.config); execute reads the pinned values from the + # epic root and passes them into the chain commands. Names match the field the + # consuming agent actually takes. + n_experiments: 10 # auto-ds: experiments per discovery run; set in the run-metadata + # JSON given to `asta autodiscovery metadata` (data_driven_discovery + # fresh runs, cohort_assembly/discovery_run) + max_papers_to_retrieve: 30 # generate-theories find-and-extract: papers to extract from + # (provenance_extraction, evidence_extraction, hypothesis_formation) + max_parallel_dv_runs: 5 # cap on concurrent DataVoyager (analyze-data) submissions when a + # step fans out runs in parallel (holdout_replication, analysis + # batches); submit up to this many, then wait before submitting more + +enums: + outcome: [held, partial, failed, underpowered, n/a] # the one verdict vocabulary, for laws, theories, and hypotheses + testability: [tested, proxy_only, untestable] + construct_equivalence: [equivalent, proxy, mismatch] + feasibility: [feasible, proxy_only, data_unavailable, construct_mismatch] + independence_axis: [region, instrument, method, construct, temporal, population] + generation_objective: [accuracy_focused, novelty_focused] + subject_kind: [empirical_law, theory, hypothesis] + novelty: [established, derivable, genuinely_new] + support_level: [supports, mixed, contradicts, inconclusive] + priority: [high, medium, low] + access_status: [acquired, open_unfetched, restricted, not_found] + +types: + + # Records are immutable: a task emits a record once; later stages never re-emit + # it with new values. Verdicts, enrichments, and acquisition results are their + # own records referencing the original by id (adjudication -> subject_id, + # source_access/acquisition -> data_source_id). + # + # Agent outputs nest VERBATIM: when a type carries another agent's record + # (theory.components, experiment rows, mcts_provenance), the agent's object is + # stored unmodified under its key - orchestrator annotations wrap it, never + # reach into or rename inside it - so a real agent payload always slots in. + # validate-output.sh deep-validates against the compiled JSON Schemas + # (assets/compiled/, regenerated by scripts/compile-schemas.py at build time): + # top-level output keys are closed, but nested objects stay open, so extra + # nested fields from real payloads are always permitted. A field name ending + # in `?` (e.g. mcts_provenance?) is optional; unmarked fields are required. + + # --- Artifacts. The `artifacts` key on every task holds A2A 1.0 Artifacts, + # exactly as the spec defines them: an artifact is an array of typed `parts` + # (wire field names, camelCase). A2A artifacts returned by chain commands are + # stored as received; locally produced byproducts (a rendered figure, a script, + # a data file) are wrapped in the same shape as file parts. Conventions on top + # of the spec: + # - agents tag the artifact kind in metadata.type, e.g. extraction-schema | + # extraction | theory | novelty | theory_store (theorizer) · + # paper-finder-search-result · widget_data_voyager (DV); local byproducts + # use figure | code | data | log | experiment-design. + # - local files are file parts in the *uri* form, uri = repo-root-relative + # path under .asta///, with a mimeType (image/png, + # text/x-python, text/csv, text/markdown, ...). + # - never put the *bytes* form in output_json - beads caps metadata at ~64KB; + # base64 payloads from agents (e.g. DV figures) are written to disk first + # and referenced by uri. + # Byproducts always travel this channel; a thing the contract *requires* + # (e.g. an analysis's figures) is a typed output key. + + artifact: # A2A 1.0 Artifact, verbatim + artifactId: string # unique within the task (e.g. UUID, or - for local byproducts) + name: string + description: string + parts: [part] + metadata?: object # optional; metadata.type carries the artifact kind + extensions?: [string] # optional; URIs of relevant A2A extensions + + part: # A2A Part union, discriminated by `kind` + kind: string # text | file | data + metadata?: object # optional, per part + # text: {kind: text, text: string} + # file: {kind: file, file: {uri: string, mimeType: string, name: string}} - the only file form allowed in output_json + # {kind: file, file: {bytes: base64, mimeType: string, name: string}} - wire/disk only, never in output_json + # data: {kind: data, data: object} - structured payloads, stored as received + + figure: # the report-embedding form: image is a repo-root-relative path + caption: string # (PNG/SVG), embedded via ![caption](path) + image: string + + experiment: # an auto-ds experiments.json record; these four fields are the + experiment_id: string # required projection - paste the full record in unchanged (extras + status: string # like experiment_plan, code, review, prior/posterior beliefs are + hypothesis: string # permitted and preserved) + analysis: string + + empirical_law: # identity of a discovered law; its verdict lives in the + id: string # adjudication that references it, never here + statement: string + construct: string + source_operationalization: string + source_node: string + effect_size_source: string # the effect size as the source run/paper claims it + grouping_rationale: string + mcts_provenance?: {surprise: number, is_surprising: boolean, prior_belief: object, posterior_belief: object} # optional; the auto-ds experiment record's search-signal fields, verbatim + + dataset: + id: string + definition: string + source: string + n: number + sampling: string + variables: [string] + covers_laws: [string] + + data_source: # the paper behind a run dataset; emitted once by provenance_search + id: string + dataset_id: string # which run dataset this sources (e.g. ds_alaska_elas) + paper_id: string # source paper (Semantic Scholar sha / corpus id) + paper_title: string + paper_url: string + + source_access: # provenance_extraction's enrichment, keyed by data_source id + data_source_id: string + data_availability: string # the paper's data-availability statement, verbatim or summarized + repository: string # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA + identifier: string # DOI / accession / direct URL for the data + + acquisition: # data_acquisition's result, keyed by data_source id + data_source_id: string + access_status: access_status # acquired | open_unfetched | restricted | not_found + local_path: string # repo-root-relative path once acquired (else empty) + dataset_id: string # the dataset registered from this source (empty if not acquired) + validation_note: string # QC against the paper - n, schema/variables, units, missingness - or why not validated + + cohort: # the data a fresh auto-ds discovery runs against (auto_discovery flow) + id: string + research_question: string # the intent the discovery runs against (from mission.md) + inclusion_criteria: string + exclusion_criteria: string + sampling: string + source_data_sources: [string] # data_source ids the cohort was assembled from + discovery_subset: {definition: string, n: number, path: string} # what discovery sees + holdout_subset: {definition: string, n: number, path: string} # independent, held back for replication + run_id: string # the stood-up auto-ds run (autodiscovery create) + + experiment_design: # one test, committed before its analysis runs; used by the + subject_kind: subject_kind # replication (law) and testing (hypothesis) branches + subject_id: string # the law / theory / hypothesis under test + experiment_name: string + plain_language_description: string + source_operationalization: string # how the source measured it (empty for a novel hypothesis) + independent_operationalization: string + construct_equivalence: construct_equivalence + feasibility: feasibility + required_data: string + data_gap: string + experiment_design_query: string # the natural-language query sent to the experiment designer (input provenance; empty when no designer ran) + prespecified: # the commitment adjudicate checks the result against + test: string # the statistical test / model + metric: string # the quantity that decides it + success_threshold: string # what counts as held, incl. direction; note expected power / min detectable effect if known + + analysis: # DataVoyager's TaskSummary, verbatim (figures are hoisted to the + final_answer: string # task's `figures` output key after imageb64 -> PNG conversion) + assumptions: string # a single text block, as the agent emits it + code: string + + audit_report: + subject_id: string # the law / theory / hypothesis whose analysis was audited + challenges: [{concern: string, check: string, outcome: string}] # include one negative-control check (e.g. shuffled predictor) + artifacts_found: [string] + verdict_survives: boolean + recommended_adjustment: string + + adjudication: # the verdict record; references its subject, never mutates it + subject_kind: subject_kind + subject_id: string + outcome: outcome # held | partial | failed | underpowered | n/a + testability: testability + effect_size_observed: string + prespecified_check: string # the observed metric vs the committed success_threshold + independence_axes: [independence_axis] + data_used: string + evidence: string + + extracted_data: + id: string + run_id: string + paper_id: string + extraction_schema_id: string + rows: + - name_short: string + name_full: string + brief_description: string + citation_title: string + uuid: string + + literature_review: # hypothesis_driven_research's survey output + summary: string + key_findings: [{text: string, uuids: [string]}] + open_gaps: [string] # gaps that motivate hypotheses + citations: [{id: string, corpus_id: number, title: string, url: string, relevance: string}] # corpus_id = canonical S2 corpusId; rows convert mechanically to PaperEntry seeds + + hypothesis: # a slim, directly testable claim (hypothesis_driven_research) + id: string + statement: string + rationale: string # why the literature implies it + falsifiable_prediction: string + grounds: [{text: string, uuids: [string]}] # the evidence the rationale rests on + + theory: + id: string + name: string + description: string + theory_query: string + objective: generation_objective # orchestrator annotation (the generation branch); the agent's own copy is components.generation_objective + grounds_law_ids: [string] # orchestrator annotation - which laws ground this theory (no agent equivalent) + supporting_evidence_ids: [string] # orchestrator annotation + components: # the theorizer's theory record, carried VERBATIM - never flatten or edit + generation_objective: string # the agent's value as emitted (e.g. accuracy-focused) + theory_statements: + - statement_name: string + theory_statement: string + supporting_evidence: [{text: string, uuids: [string]}] + conflicting_evidence: [{text: string, uuids: [string]}] + new_predictions_likely: [string] + new_predictions_unknown: [string] + unaccounted_for: [{text: string, uuids: [string]}] + + testability_triage: + assessments: + - theory_id: string + testable_now: boolean + available_data: string + required_data: string + proposed_test: {test: string, metric: string, success_threshold: string} # prespecified; the verification branch's adjudicate checks against it + gap: string + testable_theory_ids: [string] + + theory_evaluation: + id: string + theory_id: string + novelty: novelty # rollup across statement_evaluations - the most novel statement wins + overall_support: support_level + overall_support_raw?: string # the agent's untyped judgment, verbatim (optional) + explanation: string + statement_evaluations: # the agent's real granularity - novelty is scored per statement + - statement_index: number + novelty: novelty + explanation: string + + next_run_proposal: + kind: string # any flows: or tasks: key in this file + title: string + tests: [string] + data_needed: string + expected_signature: string + priority: priority + + # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report, + # theory_report, verification_report, hypothesis_report, discovery_report), one + # standalone data-gaps report, and a theory-led master (research_report). Each + # carries report_path (the .md deliverable written first), a title, a one-line + # headline, a typed body, and `links` back to the artifacts, tasks, and papers it + # rests on. Each sub-flow report exposes a local `gaps` list that gap_synthesis + # aggregates into the data_gaps_report. + + provenance_report: + report_path: string + title: string + headline: string + sources: + - dataset_id: string + paper_title: string + paper_url: string + repository: string + access_status: access_status + local_path: string + method_note: string # how sources were matched and the data merged/validated (e.g. join key, resulting n vs the run's n) + acquired: [string] + not_acquired: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + reproduction_report: + report_path: string + title: string + headline: string + method_note: string + laws_ledger: + - law_id: string + statement: string + outcome: outcome + testability: testability + effect_size_source: string + effect_size_observed: string + independence_axes: [independence_axis] + evidence: string + what_held: [string] + what_failed_or_untestable: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + theory_report: + report_path: string + title: string + headline: string + mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]} + theories: + - theory_id: string + name: string + objective: generation_objective + one_line: string + grounds_law_ids: [string] + novelty: novelty + testable_now: boolean + supporting_evidence_ids: [string] + novelty_summary: string + new_predictions: [string] + open_threads: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + verification_report: + report_path: string + title: string + headline: string + novelty_by_verification: + - theory_id: string + claim: string + novelty: novelty + outcome: outcome + effect_size: string + data_used: string + audit_survived: boolean + what_was_tested: string + what_could_not_be_tested: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + hypothesis_report: # synthesis output of the hypothesis_driven_research flow + report_path: string + title: string + headline: string + question: string # the research question from mission.md + ledger: + - hypothesis_id: string + statement: string + outcome: outcome + effect_size_observed: string + evidence: string + answer: string # what the verdicts say about the question + open_questions: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + data_gaps_report: + report_path: string + title: string + headline: string + gaps: + - item: string + missing_data: string + blocks: string + severity: priority + arose_in: string + next_steps: [next_run_proposal] + figures: [figure] + links: [{label: string, ref: string}] + + research_report: + report_path: string + title: string + headline: string + mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]} + theory_highlights: + - theory_id: string + claim: string + novelty: novelty + outcome: outcome + inference_chain: [{claim: string, chain: [string]}] + what_was_done: [string] + sub_reports: [{kind: string, report_path: string, one_line: string}] + tensions_and_surprises: [{observation: string, where: string, evidence: string}] + figures: [figure] # at least the one decisive figure, embedded in the report + links: [{label: string, ref: string}] + + discovery_report: # synthesis output of the auto_discovery flow + report_path: string + title: string + headline: string + run_id: string # the discovery run, with its cohort sizes in the report header + laws: + - law_id: string + statement: string + surprise: number # the discovery run's surprise signal for this candidate law + outcome: outcome # from the held-out replication (untested branches are n/a) + deciding_experiment: string # the held-out DataVoyager run/analysis that decided the verdict + effect_size_discovery: string # on the discovery subset + effect_size_holdout: string # on the held-out subset - the pair shows replication shrinkage + interpretation: string # what the run means against the question that motivated it + next_steps: [next_run_proposal] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + +# Tasks are pure output contracts: output maps each output_json key to its type, +# [type] meaning a JSON array of that type. Every task also carries artifacts. +# A task's inputs are declared per flow step (the same output shape takes +# different inputs in different flows), under `input:` in the flows below. + +tasks: + provenance_search: {output: {data_sources: [data_source], artifacts: [artifact]}} + provenance_extraction: {output: {extracted_data: extracted_data, source_access: [source_access], artifacts: [artifact]}} + data_acquisition: {output: {acquisitions: [acquisition], datasets: [dataset], artifacts: [artifact]}} + provenance_synthesis: {output: {provenance_report: provenance_report, artifacts: [artifact]}} + data_driven_discovery: {output: {experiments: [experiment], datasets: [dataset], artifacts: [artifact]}} + law_extraction: {output: {empirical_laws: [empirical_law], artifacts: [artifact]}} + evidence_gathering: {output: {datasets: [dataset], artifacts: [artifact]}} + experiment_design: {output: {experiment_design: experiment_design, artifacts: [artifact]}} + analysis: {output: {analysis: analysis, figures: [figure], artifacts: [artifact]}} + audit: {output: {audit_report: audit_report, artifacts: [artifact]}} + adjudicate: {output: {adjudication: adjudication, artifacts: [artifact]}} + reproduction_synthesis: {output: {reproduction_report: reproduction_report, artifacts: [artifact]}} + evidence_extraction: {output: {extracted_data: extracted_data, artifacts: [artifact]}} + theory_formation: {output: {theories: [theory], artifacts: [artifact]}} + testability_triage: {output: {testability_triage: testability_triage, artifacts: [artifact]}} + novelty_assessment: {output: {theory_evaluations: [theory_evaluation], artifacts: [artifact]}} + theory_synthesis: {output: {theory_report: theory_report, artifacts: [artifact]}} + verification_synthesis: {output: {verification_report: verification_report, artifacts: [artifact]}} + gap_synthesis: {output: {data_gaps_report: data_gaps_report, artifacts: [artifact]}} + final_synthesis: {output: {research_report: research_report, artifacts: [artifact]}} + # hypothesis_driven_research flow + literature_review: {output: {literature_review: literature_review, artifacts: [artifact]}} + hypothesis_formation: {output: {hypotheses: [hypothesis], artifacts: [artifact]}} + hypothesis_synthesis: {output: {hypothesis_report: hypothesis_report, artifacts: [artifact]}} + # auto_discovery flow (its own session in a separate workspace: source a cohort, run a fresh discovery, replicate on held-out data) + cohort_assembly: {output: {cohort: cohort, datasets: [dataset], artifacts: [artifact]}} + discovery_run: {output: {experiments: [experiment], empirical_laws: [empirical_law], artifacts: [artifact]}} + holdout_replication: {output: {adjudication: adjudication, figures: [figure], artifacts: [artifact]}} + discovery_synthesis: {output: {discovery_report: discovery_report, artifacts: [artifact]}} + +# Each flow step carries: mission (what the work is), input (the upstream steps +# in this session whose issues plan wires as the task's inputs), and chain (the +# asta commands). A node with a chain is a step; a node with only child nodes +# and a mission is a group; a chain item {workflow: , mission: } +# expands the named sub-flow inline. A group whose branches are created at +# replan (one per law / theory / hypothesis, once the naming step closes) +# declares `replan: true`. + +flows: + + data_and_literature_grounded_theory_generation: + mission: Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report. + data_provenance: + mission: Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand. + chain: + - {workflow: data_provenance, mission: Source the papers and datasets the run named in the mission was built on; acquire the open data and record what is restricted.} + reproduction: + mission: Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data. + chain: + - {workflow: reproduction, mission: Import the run named in the mission; reproduce each law on independent data with construct-equivalence and a feasibility gate.} + theorizer: + mission: Generate literature- and data-grounded theories of the reproduced laws and score their novelty. + chain: + - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.} + verification: + mission: One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan. + replan: true + analysis: + mission: Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data//figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [testability_triage, data_driven_discovery, evidence_gathering] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id. + input: [testability_triage, analysis, audit] + chain: [] + verification_synthesis: + mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`. + input: [verification, novelty_assessment] + chain: [] + gap_synthesis: + mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it. + input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis] + chain: [] + final_synthesis: + mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them. + input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis, gap_synthesis] + chain: [] + + data_provenance: + mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed. + provenance_search: + mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url). + input: [] + chain: [asta literature find, asta papers search] + provenance_extraction: + mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable. + input: [provenance_search] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + data_acquisition: + mission: For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work. + input: [provenance_search, provenance_extraction] + chain: [asta documents, asta autodiscovery upload] + provenance_synthesis: + mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note. + input: [provenance_search, provenance_extraction, data_acquisition] + chain: [] + + reproduction: + mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run. + data_driven_discovery: + mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted. + input: [] + chain: [asta autodiscovery run, asta autodiscovery experiments] + law_extraction: + mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication. + input: [data_driven_discovery] + chain: [] + evidence_gathering: + mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test. + input: [law_extraction] + chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload] + replication: + mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data. + replan: true + experiment_design: + mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap. + input: [law_extraction, evidence_gathering] + chain: [asta experiment] + analysis: + mission: Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data//figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [experiment_design, evidence_gathering] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted. + input: [experiment_design, analysis, audit] + chain: [] + reproduction_synthesis: + mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate. + input: [law_extraction, replication] + chain: [] + + theorizer: + mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test. + evidence_extraction: + mission: Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on. + input: [law_extraction, adjudicate] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + theory_generation: + mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws. + theory_formation: + mission: Form theories from the shared extraction store under this branch's objective. + input: [evidence_extraction] + chain: [asta generate-theories form-theory] + testability_triage: + mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps. + input: [theory_generation, data_driven_discovery, evidence_gathering] + chain: [] + novelty_assessment: + mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories. + input: [testability_triage] + chain: [asta generate-theories evaluate-novelty] + theory_synthesis: + mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`. + input: [theory_generation, novelty_assessment, testability_triage] + chain: [] + + hypothesis_driven_research: + mission: Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize. + literature_review: + mission: Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations. + input: [] + chain: [asta literature find, asta papers search] + hypothesis_formation: + mission: Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds. + input: [literature_review] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + testing: + mission: One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end. + replan: true + experiment_design: + mission: Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap. + input: [hypothesis_formation, literature_review] + chain: [asta experiment] + data_acquisition: + mission: Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input. + input: [experiment_design] + chain: [asta documents, asta autodiscovery upload] + analysis: + mission: Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data//figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [experiment_design, data_acquisition] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id. + input: [experiment_design, analysis, audit] + chain: [] + hypothesis_synthesis: + mission: Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses. + input: [hypothesis_formation, testing] + chain: [] + + auto_discovery: + mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one. + cohort_assembly: + mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets. + input: [] + chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata] + discovery_run: + mission: Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws. + input: [cohort_assembly] + chain: [asta autodiscovery submit, asta autodiscovery experiments] + replication: + mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset. + replan: true + holdout_replication: + mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data//figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [discovery_run, cohort_assembly] + chain: [asta analyze-data submit, asta analyze-data poll] + discovery_synthesis: + mission: Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap. + input: [discovery_run, replication] + chain: [] diff --git a/plugins/asta-preview/skills/research-step/scripts/close-task.sh b/plugins/asta-preview/skills/research-step/scripts/close-task.sh new file mode 100755 index 0000000..7535a38 --- /dev/null +++ b/plugins/asta-preview/skills/research-step/scripts/close-task.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# close-task.sh +# Publish a task's output and finish it: write output_json + output_markdown into the issue +# metadata, validate output_json against the schema, close the issue, assert it closed, then +# close any ancestor group whose last child just closed. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -eq 3 ]] || { echo "usage: close-task.sh " >&2; exit 1; } +id="$1"; oj="$2"; om="$3" +[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; } +[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; } +jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; } + +# 1. publish: merge output_json + output_markdown into the existing research_step metadata +cur="$(bd show "$id" --json | jq -c '.[0].metadata')" +merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \ + '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$merged" > "$tmp" +bd update "$id" --metadata @"$tmp" >/dev/null + +# 2. validate structurally (reads the issue back; no style lint) +bash "$here/validate-output.sh" "$id" + +# 3. close and 4. assert closure +bd close "$id" >/dev/null +[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \ + || { echo "close-task: $id did not close" >&2; exit 2; } +echo "closed $id" + +# 5. cascade: close each ancestor group whose direct children are all closed. +# The epic root is never closed here — "root open, no open tasks" is the +# session-complete state that epic-root.sh and the workflows rely on. +cur_id="$id" +while [[ "$cur_id" == *.* ]]; do + parent="${cur_id%.*}" + parent_json="$(bd show "$parent" --json 2>/dev/null)" || break + [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break + open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" ' + [ .[] + | select(.id | startswith($p + ".")) + | select((.id[($p|length)+1:] | contains(".")) | not) + | select(.status != "closed") ] | length')" + [[ "$open_kids" -eq 0 ]] || break + if bd close "$parent" >/dev/null 2>&1; then + echo "closed group $parent" + else + echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2 + break + fi + cur_id="$parent" +done diff --git a/plugins/asta-preview/skills/research-step/scripts/create-task.sh b/plugins/asta-preview/skills/research-step/scripts/create-task.sh new file mode 100755 index 0000000..1e992a9 --- /dev/null +++ b/plugins/asta-preview/skills/research-step/scripts/create-task.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# create-task.sh <brief-description> [input-id ...] +# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description, +# and initialized research_step metadata. output_json / output_markdown stay null until +# execute publishes them via close-task.sh. Prints the new issue id. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; } +parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5 + +# Validate the task_type against schemas.yaml. The helper exits 3 for an +# unknown task_type (and prints the known ones) or 5 when the schema cannot +# be read (e.g. PyYAML missing — run init); set -e propagates either. +"$here/task-output-keys.sh" "$task_type" >/dev/null + +[[ -n "$desc" ]] || { echo "create-task: a brief description is required" >&2; exit 4; } +[[ "$desc" != *$'\n'* ]] || { echo "create-task: description must be one line" >&2; exit 4; } +[[ "${#desc}" -le 200 ]] || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; } + +if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi +meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \ + '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$meta" > "$tmp" +bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent diff --git a/plugins/asta-preview/skills/research-step/scripts/epic-root.sh b/plugins/asta-preview/skills/research-step/scripts/epic-root.sh index 13a7dfd..c176ef0 100755 --- a/plugins/asta-preview/skills/research-step/scripts/epic-root.sh +++ b/plugins/asta-preview/skills/research-step/scripts/epic-root.sh @@ -33,7 +33,7 @@ if ! command -v jq >/dev/null 2>&1; then exit 3 fi -ids=$(bd list --json | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id') +ids=$(bd list --json --limit 0 | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id') count=$(printf '%s' "$ids" | grep -c . || true) case "$count" in diff --git a/plugins/asta-preview/skills/research-step/scripts/next-task.sh b/plugins/asta-preview/skills/research-step/scripts/next-task.sh new file mode 100755 index 0000000..97e3592 --- /dev/null +++ b/plugins/asta-preview/skills/research-step/scripts/next-task.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# next-task.sh — the single definition of task ordering. Prints the open task +# issues (status == open, metadata.research_step.task_type set), sorted +# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical +# sort would get this wrong past 9 siblings). Groups (no task_type) are never +# listed; there are no dependency edges, so this order is the ordering signal. +# +# Used by execute (pick the next task) and update-summary (render the queue), +# so the two never disagree about what runs next. +# +# Output (stdout, key: value lines): +# next: <bd-id> | none +# queue: <space-separated bd-ids> (omitted when empty) +# Exit: 0 (even when next: none) · 3 bd/jq missing +set -euo pipefail + +command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; } +command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; } + +ids="$(bd list --json --limit 0 | jq -r ' + [ .[] + | select(.status == "open") + | select(.metadata.research_step.task_type != null) ] + | sort_by(.id | split(".") | map(tonumber? // .)) + | .[].id')" + +if [[ -z "$ids" ]]; then + echo "next: none" + exit 0 +fi + +echo "next: $(head -n1 <<<"$ids")" +rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')" +[[ -n "$rest" ]] && echo "queue: $rest" || true diff --git a/plugins/asta-preview/skills/research-step/scripts/summary-check.sh b/plugins/asta-preview/skills/research-step/scripts/summary-check.sh index 8d98b65..6a14470 100755 --- a/plugins/asta-preview/skills/research-step/scripts/summary-check.sh +++ b/plugins/asta-preview/skills/research-step/scripts/summary-check.sh @@ -30,7 +30,7 @@ if ! command -v jq >/dev/null 2>&1; then exit 3 fi -current=$(bd list --json \ +current=$(bd list --json --limit 0 \ | jq -r '.[] | select(.status != "closed") | .id' \ | sort \ | shasum -a 256 \ diff --git a/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh b/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh new file mode 100755 index 0000000..ef1269b --- /dev/null +++ b/plugins/asta-preview/skills/research-step/scripts/task-output-keys.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# task-output-keys.sh <task_type> — print the space-separated output keys for a +# task from assets/schemas.yaml. The single schema reader for scripts: +# create-task.sh uses it to validate a task_type, validate-output.sh to get the +# expected output_json keys. +# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema +# (python3/PyYAML missing or schemas.yaml unreadable — run init) +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +schemas="$here/../assets/schemas.yaml" + +[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; } + +python3 - "$schemas" "$1" <<'PY' +import sys + +try: + import yaml +except ImportError: + print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr) + sys.exit(5) + +try: + with open(sys.argv[1]) as f: + d = yaml.safe_load(f) +except Exception as e: + print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr) + sys.exit(5) + +tasks = d.get("tasks") or {} +t = tasks.get(sys.argv[2]) +if t is None: + print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr) + print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr) + sys.exit(3) +print(" ".join(t["output"])) +PY diff --git a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh index 0f5a84e..69530f9 100755 --- a/plugins/asta-preview/skills/research-step/scripts/validate-output.sh +++ b/plugins/asta-preview/skills/research-step/scripts/validate-output.sh @@ -1,102 +1,65 @@ #!/usr/bin/env bash -# validate-output.sh — structural validation of a research_step output JSON. -# -# Usage: validate-output.sh <task_type> <metadata-json-file> -# -# Verifies that the JSON file: -# 1. parses -# 2. carries the canonical metadata envelope -# ({research_step: {task_type, inputs, output_schema_version, output}}) -# 3. has every required `output.<key>` for the given <task_type> per -# assets/schemas.yaml (schema_version: 1) -# -# Exit codes: -# 0 — valid -# 2 — JSON parse error -# 3 — unknown task_type -# 4 — missing required field -# 5 — task_type mismatch with envelope -# -# This is structural validation only. Quality validation (sound prediction, -# sane confidence, valid citations) is out of scope per execute.md. +# validate-output.sh <issue-id> — structural check of a task's stored output_json. +# Reads the issue from beads and deep-validates metadata.research_step.output_json +# against the compiled JSON Schema (assets/compiled/<task_type>.schema.json, +# regenerated from schemas.yaml by scripts/compile-schemas.py at build time): +# top-level keys closed, declared nested fields required, extra nested fields +# permitted (payloads nest verbatim). No style or quality linting. +# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task +# · 4 schema violation +# · 5 schema unreadable (PyYAML/jsonschema missing or compiled schema +# absent — run the init workflow, or update the plugin) set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -if [[ $# -ne 2 ]]; then - echo "usage: validate-output.sh <task_type> <metadata-json-file>" >&2 - exit 1 -fi +[[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; } +id="$1" -task_type="$1" -file="$2" +rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // empty')" +[[ -n "$rs" ]] || { echo "validate-output: $id has no metadata.research_step" >&2; exit 2; } +task_type="$(jq -r '.task_type // empty' <<<"$rs")" +[[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; } -if ! jq -e . "$file" > /dev/null 2>&1; then - echo "validate-output: $file is not valid JSON" >&2 - exit 2 -fi +# Exits 3 (unknown task_type) or 5 (schema unreadable) with its own message. +"$here/task-output-keys.sh" "$task_type" >/dev/null -# Required output fields, mirroring assets/schemas.yaml (schema_version: 1). -case "$task_type" in - scope) required="question boundaries success_criteria" ;; - definitions) required="terms" ;; - literature_review) required="summary_path key_findings gaps citations" ;; - hypothesis) required="statement rationale falsifiable_prediction expected_evidence" ;; - experiment_design) required="method procedure variables artifacts_expected" ;; - evidence_gathering) required="artifacts log_path deviations" ;; - analysis) required="verdict confidence reasoning caveats" ;; - synthesis) required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;; - *) - echo "validate-output: unknown task_type '$task_type'" >&2 - echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis" >&2 - exit 3 - ;; -esac +got="$(jq -c '.output_json // empty' <<<"$rs")" +[[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; } -# Envelope must carry the matching task_type so we don't validate scope JSON -# against an analysis schema by accident. -envelope_type=$(jq -r '.research_step.task_type // empty' "$file") -if [[ -z "$envelope_type" ]]; then - echo "validate-output: $file missing .research_step.task_type" >&2 +schema="$here/../assets/compiled/${task_type}.schema.json" +[[ -r "$schema" ]] || { + echo "validate-output: compiled schema missing for '$task_type' ($schema) — update the plugin (it is regenerated at build time)" >&2 exit 5 -fi -if [[ "$envelope_type" != "$task_type" ]]; then - echo "validate-output: envelope task_type='$envelope_type' but expected '$task_type'" >&2 - exit 5 -fi +} +OUTPUT_JSON="$got" python3 - "$schema" "$task_type" <<'PY' +import json +import os +import sys -# Envelope shape sanity. -for key in inputs output_schema_version output; do - if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then - echo "validate-output: $file missing .research_step.$key" >&2 - exit 5 - fi -done +try: + import jsonschema +except ImportError: + print("validate-output: python3 cannot import jsonschema - run the init workflow", file=sys.stderr) + sys.exit(5) -# Required output fields. -for key in $required; do - if ! jq -e ".research_step.output | has(\"$key\")" "$file" >/dev/null; then - echo "validate-output: missing required field 'output.$key' for task_type '$task_type'" >&2 - exit 4 - fi -done +with open(sys.argv[1]) as f: + schema = json.load(f) +data = json.loads(os.environ["OUTPUT_JSON"]) -# Type spot-checks for the high-leverage cases. Not exhaustive — just the -# fields where a wrong type at this layer would silently break update-summary rendering -# or downstream tasks. -case "$task_type" in - literature_review) - jq -e '.research_step.output.key_findings | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.key_findings must be an array" >&2; exit 4; } - jq -e '.research_step.output.gaps | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.gaps must be an array" >&2; exit 4; } - jq -e '.research_step.output.citations | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.citations must be an array" >&2; exit 4; } - ;; - analysis) - jq -e '.research_step.output.verdict | IN("supported", "refuted", "inconclusive")' "$file" >/dev/null \ - || { echo "validate-output: output.verdict must be one of supported|refuted|inconclusive" >&2; exit 4; } - jq -e '.research_step.output.confidence | type == "number" and . >= 0 and . <= 1' "$file" >/dev/null \ - || { echo "validate-output: output.confidence must be a number in [0, 1]" >&2; exit 4; } - ;; -esac +validator = jsonschema.Draft202012Validator(schema) +errors = sorted(validator.iter_errors(data), key=lambda e: list(map(str, e.absolute_path))) +if errors: + for e in errors[:5]: + path = ".".join(str(p) for p in e.absolute_path) + where = f"output_json.{path}" if path else "output_json" + hint = "" + if e.validator == "additionalProperties" and not path: + hint = " - byproducts go in artifacts" + print(f"validate-output: {where}: {e.message}{hint}", file=sys.stderr) + if len(errors) > 5: + print(f"validate-output: ... and {len(errors) - 5} more schema violation(s)", file=sys.stderr) + print(f"validate-output: output_json does not satisfy the '{sys.argv[2]}' schema", file=sys.stderr) + sys.exit(4) +PY echo "ok" diff --git a/plugins/asta-preview/skills/research-step/scripts/write-meta.sh b/plugins/asta-preview/skills/research-step/scripts/write-meta.sh deleted file mode 100755 index 6e7d71a..0000000 --- a/plugins/asta-preview/skills/research-step/scripts/write-meta.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# write-meta.sh — materialize a metadata JSON blob to a temp file and print -# its path, suitable for `bd update <id> --metadata @<path>` or -# `bd create ... --metadata=@<path>`. -# -# Reads JSON from stdin (or from $1 if a path is given), validates that it -# parses, and writes it under $TMPDIR with mode 0600. The path is printed on -# stdout so the caller can splice it into a bd command. -# -# Why this exists: `bd update --metadata` accepts either a JSON string or -# `@file.json`. Inlining a JSON string requires `"$(cat /tmp/x.json)"` (a -# non-bd shell op the SKILL.md frontmatter does not permit), and shell quoting -# gets fragile with embedded quotes. Materializing a file once and using -# `@path` keeps everything in `Bash(bd:*)` territory. -set -euo pipefail - -tmp=$(mktemp -t research-step-meta.XXXXXX.json) -trap 'rm -f "$tmp"' ERR - -if [[ $# -ge 1 ]]; then - cp "$1" "$tmp" -else - cat > "$tmp" -fi - -if ! jq -e . "$tmp" >/dev/null 2>&1; then - echo "write-meta: input is not valid JSON" >&2 - rm -f "$tmp" - exit 2 -fi - -chmod 0600 "$tmp" -echo "$tmp" diff --git a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md index 884f48f..6a9bbf6 100644 --- a/plugins/asta-preview/skills/research-step/workflows/brainstorm.md +++ b/plugins/asta-preview/skills/research-step/workflows/brainstorm.md @@ -25,27 +25,27 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m Pick the branch that matches; do not run more than one. - **No `mission.md`** → help the user draft one. - Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**. + Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)**: open `assets/schemas.yaml` and enumerate the keys under `flows:` — do **not** offer flows from memory; the file is the only source of the list, and each flow's purpose is in its `mission` field. A custom chain of `tasks:` entries is also an option. A session may run more than one flow. Record the chosen flow(s) in `mission.md` so `plan` can read them. Also surface the session **config knobs** (the `config:` section of `assets/schemas.yaml`, e.g. `n_experiments`, `max_papers_to_retrieve`) with their defaults; record any non-default choices in a `## Config` section of `mission.md` (one `key: value` line each) — `plan` pins the resolved config on the epic at bootstrap. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**. - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session. -- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / ready counts plus the single most-relevant ready task) and ask what they want to do next. +- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / open-task counts plus the next task from `scripts/next-task.sh`) and ask what they want to do next. ### 3. Answer questions, preferring `summary.md` -`summary.md` is the synthesized view of the session — mission, scope, definitions, related work, hypotheses, results, open questions, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current. +`summary.md` is the synthesized view of the session — mission, flow(s), results so far (report headlines), gaps, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current. -**Default path: read `summary.md`.** For most questions ("what's the current scope?", "which hypotheses are open?", "what's blocking progress?", "what's the state of H2?"), the answer is already in this file. Read it first; quote or summarize the relevant section. +**Default path: read `summary.md`.** For most questions ("which laws held?", "what theories came out?", "what's blocking progress?", "what's next?"), the answer is already in this file. Read it first; quote or summarize the relevant section. **Drop down to beads only when the digest doesn't have the answer.** `summary.md` summarizes; some questions need the raw outputs: | Need | Query | |---|--------------------------------------------------------------------------------------------------------| -| Single issue's full `metadata.research_step.output` | `bd show <id> --json` | -| Full open-issue metadata (rare; usually the digest covers it) | `bd list` | -| Dependency structure | `bd dep tree <epic-id> --direction up`| -| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output.summary_path` referenced from the digest | -| Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number) | +| Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` | +| Full issue metadata (rare; usually the digest covers it) | `bd list --all --limit 0` | +| Task tree | `bd list --json --all --limit 0` — ids encode the parent-child outline | +| Long-form content behind a report | follow `report_path` (or any `_path` field) from the issue's `output_json` | +| Exact verdict / effect size for a law, theory, or hypothesis | `bd show <adjudicate-id> --json` (the adjudication record; the digest reports headlines, not the numbers) | Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`. diff --git a/plugins/asta-preview/skills/research-step/workflows/execute.md b/plugins/asta-preview/skills/research-step/workflows/execute.md index 5fba9ea..b4ba1ef 100644 --- a/plugins/asta-preview/skills/research-step/workflows/execute.md +++ b/plugins/asta-preview/skills/research-step/workflows/execute.md @@ -5,36 +5,39 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr ## Preconditions - An epic root exists (`scripts/epic-root.sh` prints `status: found`). -- `bd ready --json` is non-empty, **or** the caller supplied a specific task ID that is currently `open` and unblocked. +- An open issue with a `task_type` exists, **or** the caller supplied a specific `open` task ID. ## Steps -1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). Hypothesis tasks are normally auto-resolved at creation by **plan**, so they should not appear here. If one does, it means the gap text was too thin for plan to fill the output without inventing content — flag this to the user and ask whether to refine the source `literature_review` first. -2. **Claim it.** `bd update <id> --status=in_progress`. -3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`. -4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state. -5. **Do the work.** Produce a JSON object matching the schema. For schema fields ending in `_path`, write the file to disk first and put the relative path in the JSON. -6. **Validate structurally.** Run `scripts/validate-output.sh <task_type> <metadata-json-file>`. It checks the envelope (`research_step.task_type`, `inputs`, `output_schema_version`, `output`) and every required `output.<key>` for the task_type, plus type spot-checks for the high-leverage cases (e.g., `analysis.verdict` enum, `analysis.confidence` range). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close. -7. **Persist the output.** Materialize the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`. -8. **Close.** `bd close <id>`. -9. **Hand off to plan or update-summary.** Some closed task types unlock new graph structure; others don't. Decide based on the closed task's `task_type`: +1. **Pick a task.** If a task ID was supplied, use it. Else run `scripts/next-task.sh` and take the `next:` id — it is the single definition of ordering (open issues with a `task_type`, numerically sorted by hierarchical id; `update-summary` renders the same order). `next: none` ⇒ report that and stop. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal. +2. **Check readiness.** For every issue id in this task's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), verify it is `closed` with a non-null `output_json`. If any input is not ready, **stop and report it** — the graph was built out of order (a task left `in_progress`, or a replan misordering); do not improvise the missing input. This is the readiness check that dependency edges used to provide. +3. **Claim it.** `bd update <id> --status=in_progress`. +4. **Load the schema and config.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>.output` (a mapping of key → type; `[type]` means a JSON array of that type); find the step inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.experiment_design`) — and use its `mission`, `input`, and `chain`. Read the **session config** pinned on the epic root (`bd show <epic-id> --json | jq '.[0].metadata.research_step.config'`) and pass its values into the chain where they apply — `n_experiments` into the run-metadata JSON for `asta autodiscovery metadata`, `max_papers_to_retrieve` on `asta generate-theories find-and-extract`. Do not re-read defaults from schemas.yaml mid-session; the pin is the truth. (Sessions bootstrapped before config pinning exist: an absent pin means use the schemas.yaml defaults.) +5. **Gather inputs.** For every issue listed in this issue's `inputs`, read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `report_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state. +6. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things: + - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`), and nothing else. Fill every typed field the schema declares (including typed verdicts like `adjudication.outcome` or `audit_report.verdict_survives`); only values with **no typed field** (an execution id, intermediate file paths, raw tool output) go in `artifacts`. Artifact rows are **A2A 1.0 Artifacts** — `{artifactId, name, description, parts, metadata}`, where `parts` is an array of text / file / data parts (see `artifact` and `part` in the schema). Artifacts returned by chain commands are stored as received (their kind in `metadata.type`); locally produced byproducts (a figure, a script, a data file) are wrapped as file parts in the uri form — repo-root-relative path plus mimeType — never the bytes form (beads' ~64KB cap). Records are immutable — emit verdicts and enrichments as their own records referencing the original by id (`adjudication.subject_id`, `source_access.data_source_id`); never re-emit an upstream record with changed values. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON. + - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key, following the **Report conventions** below (entity hyperlinks, tables, figures). This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files. +7. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal the keys of `tasks.<task_type>.output` — which always include `artifacts` — none null; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed (it never closes the epic root — the session-complete state is root open with no open tasks). A non-zero exit **before** the `closed <id>` line means the issue is still `in_progress` — fix and re-run. A warning **after** `closed <id>` means the task closed but a group could not be auto-closed; close that group manually. The `description` is untouched; it stays the brief one-liner set at creation. +8. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly. - | Closed task_type | Hand off to | - |---|---| - | `literature_review`, `hypothesis`, `analysis`, `synthesis` | **plan** (with this issue as the source). `plan` then chains to **update-summary**. Note: `hypothesis` only reaches this branch in the rare case it was left open at creation; the normal path is plan→auto-resolve. | - | `scope`, `definitions`, `experiment_design`, `evidence_gathering` | **update-summary** directly. | +## Report conventions - Either path ends with `summary.md` rebuilt. +These apply to every `output_markdown` and to every `*_synthesis` report deliverable. Rigorous but not over the top: a report stays roughly 50–100 lines; the detail behind it lives in artifacts it links to. -## Notes on output files +- **Every named entity is a hyperlink.** Papers → DOI or canonical Semantic Scholar URL; datasets and result files → relative path; runs/experiments → their artifact or metadata file; laws/theories/hypotheses → their ledger row, written with an anchor (`<a id="l1"></a>`) so other reports can deep-link (`reproduction_report.md#l1`). A named thing with no link is a defect. +- **Tables are the spine.** Any ledger, matrix, or catalog (laws × outcomes, theories × verdicts, sources × access) is a table with one row per record, mirroring the typed rows in `output_json`. +- **Figures carry the quantitative claims.** Embed each one (`![caption](path)`) where the claim is made and list it in the `figures` output field. Analysis-type tasks must emit at least one figure; synthesis reports embed the figures their headline rests on (effect-size comparisons, verdict panels, discovery-vs-holdout shrinkage). +- Neutral, third-person register; numbers in the text match the tables they summarize. -Schema fields ending in `_path` are relative paths. Conventions: +## Notes on output -- `summary_path` (from `literature_review`) → `background_knowledge.txt` by convention, but any path works. -- `log_path` (from `evidence_gathering`) → typically under `logs/`. -- `report_path` (from `synthesis`) → typically `report.md`. +The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`). `output_json.artifacts` holds A2A Artifacts whose file parts reference those paths by uri; heavy payloads (base64 bytes, raw agent JSON) stay on disk, never inline. -Write the file before setting the output JSON. If the executor crashes between writing the file and closing the issue, the file is harmless orphan data — re-running `execute` on the same issue will overwrite it. +Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`: + +- `report_path` (from every `*_synthesis` report) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `hypothesis_report.md`, `data_gaps_report.md`). + +If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it. ## Out of scope for this workflow diff --git a/plugins/asta-preview/skills/research-step/workflows/init.md b/plugins/asta-preview/skills/research-step/workflows/init.md index fd11be3..408c60f 100644 --- a/plugins/asta-preview/skills/research-step/workflows/init.md +++ b/plugins/asta-preview/skills/research-step/workflows/init.md @@ -1,6 +1,6 @@ # Workflow: init -Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready. +Bootstrap the environment for a research session: install `bd`, `jq`, PyYAML, and jsonschema, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready. After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier. @@ -32,12 +32,16 @@ Server mode (`bd init --server`) is out of scope: it requires running a Dolt sql - If no Dolt refs exist on the remote, surface the situation to the user with three options: (a) `bd import .beads/issues.jsonl` (fast, but discards Dolt history and any state newer than the export), (b) configure a Dolt remote and `bd dolt push` from another machine that has the live DB, then retry, (c) abort. - Pick one path only after explicit user confirmation. Never auto-import. -4. **Verify the staleness check works.** +4. **Ensure `python3` can import `yaml` (PyYAML) and `jsonschema`.** `scripts/task-output-keys.sh` (used by `create-task.sh` and `validate-output.sh`) parses `assets/schemas.yaml` with PyYAML; `validate-output.sh` deep-validates each task's `output_json` against the compiled schemas in `assets/compiled/` with jsonschema, and hard-fails (exit 5) without it. + - Probe with `python3 -c 'import yaml, jsonschema'`. If it succeeds, skip. + - Otherwise install what's missing: `python3 -m pip install --user pyyaml jsonschema` (or the platform equivalent, e.g. `apt-get install python3-yaml python3-jsonschema`). Re-probe; if it still fails, abort and ask the user. + +5. **Verify the staleness check works.** - Run `scripts/summary-check.sh`. It hashes the sorted IDs of currently-open issues and compares against `summary.md`'s frontmatter. Backend-agnostic — beads can use whichever storage it likes. - Requires `jq` on PATH; if missing, install it (`brew install jq`, `apt-get install jq`, etc.) and retry. - At init time `summary.md` does not yet exist, so the script will print `status: missing` and exit 1 — that's fine; **update-summary** will create the file later. `status: no-tools` (exit 3) means abort and ask the user. -5. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**. +6. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**. ## Cross-machine transfer diff --git a/plugins/asta-preview/skills/research-step/workflows/plan.md b/plugins/asta-preview/skills/research-step/workflows/plan.md index c5ffb2d..444ee90 100644 --- a/plugins/asta-preview/skills/research-step/workflows/plan.md +++ b/plugins/asta-preview/skills/research-step/workflows/plan.md @@ -1,99 +1,94 @@ # Workflow: plan -Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state: +Create or extend the research graph. The flow chains live in `assets/schemas.yaml` (`flows`) — plan reads them, it does not hardcode the sequence. Two modes: -- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier (scope, definitions, literature_review) from `mission.md`. -- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction. +- **bootstrap** — no epic yet: pick a flow and lay its first step(s). +- **replan** — an epic exists: after a step closes, add the next step(s) in its flow chain. -Always chains to **update-summary** afterward so `summary.md` reflects the new graph. +Always chains to **update-summary** afterward. ## Preconditions -- `bd` is installed and `.beads/` is initialized. If not, run **init** first. -- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one. -- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`. +- `bd` installed and `.beads/` initialized (else run **init**). +- **bootstrap**: `mission.md` exists; no epic yet (`scripts/epic-root.sh` → `none`). +- **replan**: an epic exists; either `execute` supplied the closed source task, or the user named what to extend. -## Issue metadata convention +## Task metadata -Every task issue carries: +Create task leaves with `scripts/create-task.sh <parent> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]`. It sets `metadata.research_step = {flow, task_type, inputs, output_schema_version, output_json: null, output_markdown: null}` and a **brief one-line `description`** (it rejects a missing, multi-line, or over-long description). `execute` later publishes `output_json` (the structured result) and `output_markdown` (the narrative) via `close-task.sh`; the description is not overwritten. The epic carries `epic_root: true`; group nodes (loops, fan-outs, branches) are epics created with `bd create --parent <parent> -t epic` (no task_type, no description rules). A session may run several flows — the flow is per task, not per epic. -```json -{ - "research_step": { - "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis>", - "inputs": ["bd-xxxx", "bd-yyyy"], - "output_schema_version": 1, - "output": null - } -} -``` +## Indentation is the tree -The mission epic additionally carries `epic_root: true`. +The flow in `assets/schemas.yaml` is an indented outline, and the beads graph you build **is that same outline**: each indentation level in the flow becomes one parent-child level in beads. Build it with `bd create --parent`, walking the flow top-down, so hierarchical ids (`wf`, `wf.1`, `wf.1.1`, …) encode the outline position. There are **no `blocks`/`deps` edges** — ordering is the id order, because you create nodes in the order they run. -## Mode selection +Reading a flow node: -1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**. -2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed. +- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`. Its `input:` names the upstream steps in this session whose issues you wire as the task's `inputs` (the same task type takes different inputs in different flows, so inputs live on the step, not the task). +- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission`, `input`, and `chain` are never nodes. +- A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree. +- A **fan-out group** (`replication`, `theory_generation`, `verification`, `testing`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on. -## Bootstrap mode +The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks): -1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**. -2. **Create the epic.** - ``` - bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)" - bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}' - ``` -3. **Create the initial frontier.** Three `task` issues with the metadata convention above: - - `scope: <one-line>` — `inputs: []` - - `definitions: <one-line>` — `inputs: [<scope-id>]` - - `literature_review: <one-line>` — `inputs: [<scope-id>, <definitions-id>]` -4. **Add edges.** - - `parent-child` from each frontier task to the epic - - `blocks`: scope → definitions; scope → literature_review; definitions → literature_review -5. **Report.** Print the epic ID and the three task IDs. +``` +wf [epic] <mission> + wf.1 [loop] reproduction + wf.1.1 data_driven_discovery + wf.1.2 law_extraction + wf.1.3 evidence_gathering + wf.1.4 [fan-out] replication one branch per law + wf.1.4.1 [branch] <law> + wf.1.4.1.1 experiment_design + wf.1.4.1.2 analysis + wf.1.4.1.3 audit + wf.1.4.1.4 adjudicate + wf.1.4.2 [branch] <law> … + wf.1.5 reproduction_synthesis +``` -## Replan mode +The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproduction, `wf.3` theorizer, `wf.4` verification (one branch per testable theory), `wf.5` verification_synthesis, `wf.6` gap_synthesis, `wf.7` final_synthesis. Each sub-flow ends in its own synthesis step that emits a report (provenance_report, reproduction_report, theory_report, verification_report); gap_synthesis aggregates their gaps into data_gaps_report and final_synthesis writes the theory-led research_report. -Read the source task's task_type and output: +## Ordering and closing (no edges) -``` -bd show <source-id> --json | jq '.[0].metadata.research_step.task_type' -bd show <source-id> --json | jq '.[0].metadata.research_step.output' -``` +- **Next task = the `next:` line of `scripts/next-task.sh`** (open issues with a `task_type`, **numerically** sorted by hierarchical id — `wf.1.2` before `wf.1.10`). Groups (no `task_type`) are never executed. `execute` and `update-summary` both use this script, so they never disagree about what runs next. +- Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last. +- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. It never closes the **epic root**: "root open, no open tasks" is the session-complete state. Never close groups by hand. + +## Static vs data-dependent fan-outs + +- **Static** (`theory_generation` by objective): both branches are known up front → create them together. +- **Data-dependent** (`replication` per law, `verification` per testable theory, `testing` per hypothesis): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`, `hypothesis_formation`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it. + +## Gates (replan) -Apply this table: +- When `experiment_design` closes (a `replication` or `testing` branch): `feasibility` of `feasible`/`proxy_only` → create the branch's remaining steps — in `testing`, also `data_acquisition` when the design names data not yet in hand — i.e. `[data_acquisition,] analysis`, `audit`, `adjudicate`; `data_unavailable`/`construct_mismatch` → create only `adjudicate` (it records `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created. +- When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report. +- When `hypothesis_formation` closes: create one `testing` branch per hypothesis. -| Source task_type | Action | -|---|---| -| `literature_review` | For each gap in `output.gaps`, create a `hypothesis` task with `inputs: [<scope-id>, <source-id>]`. Edges: `parent-child` to epic; `blocks` from the source. **Populate `metadata.research_step.output` at creation time** (see below) and close the issue immediately — the gap text already contains the statement, rationale, and prediction in prose, so there is no separate `execute` pass for hypotheses. | -| `hypothesis` | Create the chain `experiment_design` → `evidence_gathering` → `analysis`, each `blocks` the next. `experiment_design` depends on the hypothesis (via `inputs`); `analysis` depends on both the hypothesis and the new `evidence_gathering`. All three get `parent-child` to the epic. | -| `analysis` | If every `hypothesis` in the epic now has a closed `analysis`, create one `synthesis` task with `inputs` listing all analysis IDs and the scope ID. `parent-child` to epic; `blocks` from each analysis. Otherwise no-op. | -| `synthesis` | If `output.open_questions` is non-empty, **stop and ask the user** before creating new `hypothesis` tasks. If approved, create them with a `discovered-from` edge back to the synthesis (in addition to the usual edges). | -| `scope`, `definitions`, `experiment_design`, `evidence_gathering` | No replan. Report no-op and stop. | +## Bootstrap -If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop. +1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear. +2. **Resolve the session config.** Start from the `config:` defaults in `assets/schemas.yaml`; apply any overrides from a `## Config` section in `mission.md` (one `key: value` line each; unknown keys are an error — surface them). The resolved map is pinned in the next step and never re-resolved mid-session. +3. `bd create -t epic` the root from the mission, tagged with metadata `{"research_step": {"epic_root": true, "flow": "<flow>", "config": {<resolved config>}}}`. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation. +4. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close. +5. Report the epic id, the flow, the resolved config, the loop/group ids, and the frontier task ids. -### Auto-resolving hypothesis tasks +## Replan -When creating a `hypothesis` from a literature_review gap: +When a step closes, create the next node(s) under their parent, in flow order: -1. Derive the four output fields directly from the gap text and surrounding `literature_review` output (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`): - - `statement` — `H_n: <one-sentence claim>` - - `rationale` — why this gap implies the claim - - `falsifiable_prediction` — what observation would refute it - - `expected_evidence` — list of concrete evidence types that would support it -2. Validate with `scripts/validate-output.sh hypothesis <metadata-json-file>` before persisting. -3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`. +- Create each step with `create-task.sh`. Its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling; the step's `input:` list in `schemas.yaml` names **which** upstream steps to wire. +- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the branch steps under each via `create-task.sh` — **but a gated group lays only the steps up to its gate**: under a `replication` or `testing` branch create only `experiment_design`; the Gate below creates the rest when it closes. Ungated branches (`verification`: analysis, audit, adjudicate; `theory_generation`: theory_formation) get all their steps at branch creation. Record why for any branch the data can't support, rather than skipping it. +- Apply the **Gates** rules above — they are the only creator of post-gate steps, so nothing is double-created. +- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `hypothesis_synthesis`, `discovery_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape. -If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted. +Stop at the end of the flow. If the closed step has nothing downstream, report no-op. ## After either mode -Hand off to **update-summary** so `summary.md` reflects the new state. +Hand off to **update-summary**. There are no edges to verify — the parent-child tree is the whole structure. ## Out of scope -- Running tasks or producing outputs. That belongs to **execute**. -- Environment setup (installing `bd`/`jq`, `bd init`). That belongs to **init**. -- Editing `mission.md`. That belongs to **brainstorm**. -- Validating output quality. +- Running tasks or producing outputs (**execute**). +- Environment setup (**init**); editing `mission.md` (**brainstorm**); judging output quality. diff --git a/plugins/asta-preview/skills/research-step/workflows/update-summary.md b/plugins/asta-preview/skills/research-step/workflows/update-summary.md index a79f6ff..a96a9fa 100644 --- a/plugins/asta-preview/skills/research-step/workflows/update-summary.md +++ b/plugins/asta-preview/skills/research-step/workflows/update-summary.md @@ -15,13 +15,11 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both). 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`. -3. **Gather state inline.** All you need to fill the template comes from a few `bd` queries: - - `bd list --json` for the full tree (issue_count, status partition). - - `bd ready --json` for the ready list (also drives the Next Steps section). - - `bd blocked --json` for the blocked count. - Project each list to `{id, task_type: .metadata.research_step.task_type, title}` with `jq` and partition by `.status`. +3. **Gather state inline.** + - `bd list --json --all --limit 0` for the full tree — `--all` because closed issues carry the results, `--limit 0` because bd truncates at 50 rows by default. Project to `{id, task_type: .metadata.research_step.task_type, title, status}` and partition by `.status`. + - `scripts/next-task.sh` for the **next task and the queue** (open task-type issues, numerically sorted by id — the same order `execute` uses). This replaces `bd ready`; there are no edges, so id order is the ordering signal. 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`. -5. **Overwrite `summary.md`** using this template: +5. **Overwrite `summary.md`** using this template (sections come from the **new taxonomy** — flows, laws, theories, reports — not from any per-flow hardcoding; render what the closed tasks' `output_json` actually contains): ```markdown --- @@ -29,7 +27,7 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is beads_epic: <bd-id> generated_at: <ISO-8601 UTC> issue_count: <n> - ready_count: <n> + open_task_count: <n> --- # <mission title> @@ -37,37 +35,29 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is ## Mission <verbatim mission.md, or one-paragraph summary if long> - ## Research Question & Scope - <from scope issue's output, or "pending" if not yet closed> + ## Flow + <one line per flow this session runs (from task metadata `flow`), with where it + stands — e.g. "reproduction — replication branches 2/5 closed, synthesis pending"> - ## Operational Definitions - <from definitions issue's output> + ## Results so far + <one subsection per closed `*_synthesis` task: the report's `headline` plus a link + to its `report_path`. Before any synthesis has closed, instead give one bullet per + closed task: "<bd-id> [<task_type>]: <one-line outcome from output_json>" — e.g. + laws extracted, datasets acquired, theories formed, verdicts finalized.> - ## Related Work - <literature_review.output.key_findings as bullets; link to summary_path> - - ## Hypotheses - <one subsection per hypothesis issue: "H_n: <statement>" plus current verdict from its analysis if closed> - - ## Experimental Designs - <one subsection per experiment_design, grouped under its hypothesis> - - ## Results Summary - <table: hypothesis | verdict | confidence | analysis-id> - - ## Open Questions - <synthesis.output.open_questions if synthesis exists, else aggregated from in-flight notes> + ## Gaps + <the `gaps` rows from closed report outputs (item — missing_data — severity), + or "none recorded"> ## Status - Closed: <n> - In progress: <n> — IDs: <list> - - Ready: <n> — IDs: <list> - - Blocked: <n> + - Open tasks: <n> — next: <`next:` from next-task.sh>; queue: <`queue:` line> ### Next Steps - <from `bd ready --json`: one bullet per ready issue, formatted as + <the queue from next-task.sh in order, one bullet each: "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>". - If `bd ready` is empty, write "No ready tasks — graph is blocked or complete."> + If next-task.sh prints `next: none`, write "No open tasks — flow complete."> ``` 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.) @@ -79,4 +69,4 @@ Any reader (human or agent) checks freshness by running `scripts/summary-check.s ## Out of scope for this workflow - Mutating beads. `update-summary` is read-only against `.beads/`. -- Re-planning. Even if `bd ready` is empty and the graph is incomplete, `update-summary` does not create issues. +- Re-planning. Even if no open tasks remain and the graph is incomplete, `update-summary` does not create issues. diff --git a/plugins/asta/skills/research-step/SKILL.md b/plugins/asta/skills/research-step/SKILL.md index 0d2fcee..e9f9a8c 100644 --- a/plugins/asta/skills/research-step/SKILL.md +++ b/plugins/asta/skills/research-step/SKILL.md @@ -1,12 +1,12 @@ --- name: research-step description: Plan and execute autonomous research as a graph of typed tasks tracked in beads. Use when working from a mission.md to drive multi-step research with explicit dependencies and structured outputs. -allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*) +allowed-tools: Bash(bd:*) Bash(date:*) Bash(scripts/*) Bash(asta:*) Read(assets/**) Read(workflows/**) Read(scripts/**) Skill(asta:*) Skill(asta-preview:*) Skill(asta-plugins:*) --- # Research Step -Models a research session as a beads epic. Each unit of work is a typed sub-issue whose `metadata.research_step.output` matches a JSON schema in `assets/schemas.yaml`. +Models a research session as a beads epic. A session runs a **flow** — the composed `data_and_literature_grounded_theory_generation` (which begins with `data_provenance`), its sub-flows `reproduction` and `theorizer`, the standalone `hypothesis_driven_research` flow (literature → falsifiable hypotheses → one prespecified test per hypothesis), the standalone `auto_discovery` flow (source a cohort and run a fresh discovery; run it as its own session in a **separate workspace** — own `mission.md` and `.beads` — typically kicked off after a theory-generation run; a second epic root in the same workspace breaks `scripts/epic-root.sh`), or a custom chain (each flow's purpose is in its `mission` field in `assets/schemas.yaml`). `assets/schemas.yaml` defines the reusable `types` (immutable records — verdicts are `adjudication` records referencing their subject), the `tasks` (pure output contracts mapping each output key to its type), and the `flows` (each step carrying its `mission`, its `input` steps, and its asta `chain`). Each unit of work is a typed sub-issue whose `metadata.research_step.output_json` matches its task's output in the schema; the issue envelope carries `flow` and `task_type`. This skill is a **router**. Inspect the working directory and the user's request, pick one workflow, then read its `.md` file in `workflows/` and follow it. Do not execute a workflow from memory — always open the file first. @@ -23,7 +23,7 @@ Installing `bd` and `jq`, running `bd init`, and verifying `scripts/summary-chec | `mission.md` | Input. The research task. | | `.beads/` | Source of truth for state. | | `summary.md` | Derived view of the session, regenerated by **update-summary**. Beads is the source of truth; this file is just a digest for humans and for **brainstorm**. Frontmatter `beads_snapshot` records the state it was rendered from. | -| `background_knowledge.txt` | Optional. Long-form context referenced from issue metadata via `summary_path`. | +| `.asta/<agent>/<slug>/` | Heavy artifacts (raw agent JSON, datasets, reports), referenced from `output_json` by repo-root-relative `_path` fields. | ## Workflows @@ -51,7 +51,7 @@ If the user did not name a workflow, run **brainstorm**. It inspects the working - **init** → always run **plan** afterwards (which then chains to **update-summary**). - **plan** → always run **update-summary** afterwards so the digest reflects the new graph. -- **execute** → if the closed task type is `literature_review`, `hypothesis`, `analysis`, or `synthesis`, chain to **plan** (which chains to **update-summary**); otherwise chain directly to **update-summary**. +- **execute** → chain to **plan** when the closed task type unlocks new structure for its flow (see the hand-off rule in `execute.md`, last step); otherwise chain directly to **update-summary**. - **update-summary** and **brainstorm** → never chain. ## Boundaries diff --git a/plugins/asta/skills/research-step/assets/schemas.yaml b/plugins/asta/skills/research-step/assets/schemas.yaml index b840628..b5ead12 100644 --- a/plugins/asta/skills/research-step/assets/schemas.yaml +++ b/plugins/asta/skills/research-step/assets/schemas.yaml @@ -1,80 +1,638 @@ -# Output schemas for research-step task types. -# Each task issue stores its realized output at metadata.research_step.output, -# matching the shape under `output:` for its task_type. - -schema_version: 1 - -task_types: - - scope: - inputs: [] - output: - question: string # the precise research question - boundaries: [string] # what is in / out of scope - success_criteria: [string] # how we know we have answered it - - definitions: - inputs: [scope] - output: - terms: - - name: string - operational_definition: string - rationale: string - - literature_review: - inputs: [scope, definitions] - output: - summary_path: string # relative path; long-form context - key_findings: [string] # 3-10 bullets readable without opening summary_path - gaps: [string] # gaps that motivate hypotheses - citations: - - id: string - title: string - url: string - relevance: string - - hypothesis: - inputs: [scope, literature_review] - output: - statement: string # H_n: ... - rationale: string - falsifiable_prediction: string - expected_evidence: [string] - - experiment_design: - inputs: [hypothesis] - output: - method: string - procedure: [string] # ordered steps - variables: - independent: [string] - dependent: [string] - controls: [string] - artifacts_expected: [string] # paths the gathering step will produce - - evidence_gathering: - inputs: [experiment_design] - output: - artifacts: - - path: string - kind: string # data | log | figure | code | other - description: string - log_path: string # what was actually run - deviations: [string] # ways execution diverged from design - - analysis: - inputs: [hypothesis, evidence_gathering] - output: - verdict: enum [supported, refuted, inconclusive] - confidence: number # 0.0 - 1.0 - reasoning: string - caveats: [string] - - synthesis: - inputs: [scope, analysis_*] # all analysis issues in the epic - output: - answer: string # answer to scope.question - supporting_hypotheses: [bd_id] - refuted_hypotheses: [bd_id] - open_questions: [string] # become discovered-from edges on re-plan - report_path: string # generated markdown report +version: 2 + +config: + # Session-tunable knobs and their defaults. A mission.md may override any of + # them in a `## Config` section (one `key: value` line each). plan's bootstrap + # resolves defaults + mission overrides and pins the result on the epic root + # (metadata.research_step.config); execute reads the pinned values from the + # epic root and passes them into the chain commands. Names match the field the + # consuming agent actually takes. + n_experiments: 10 # auto-ds: experiments per discovery run; set in the run-metadata + # JSON given to `asta autodiscovery metadata` (data_driven_discovery + # fresh runs, cohort_assembly/discovery_run) + max_papers_to_retrieve: 30 # generate-theories find-and-extract: papers to extract from + # (provenance_extraction, evidence_extraction, hypothesis_formation) + max_parallel_dv_runs: 5 # cap on concurrent DataVoyager (analyze-data) submissions when a + # step fans out runs in parallel (holdout_replication, analysis + # batches); submit up to this many, then wait before submitting more + +enums: + outcome: [held, partial, failed, underpowered, n/a] # the one verdict vocabulary, for laws, theories, and hypotheses + testability: [tested, proxy_only, untestable] + construct_equivalence: [equivalent, proxy, mismatch] + feasibility: [feasible, proxy_only, data_unavailable, construct_mismatch] + independence_axis: [region, instrument, method, construct, temporal, population] + generation_objective: [accuracy_focused, novelty_focused] + subject_kind: [empirical_law, theory, hypothesis] + novelty: [established, derivable, genuinely_new] + support_level: [supports, mixed, contradicts, inconclusive] + priority: [high, medium, low] + access_status: [acquired, open_unfetched, restricted, not_found] + +types: + + # Records are immutable: a task emits a record once; later stages never re-emit + # it with new values. Verdicts, enrichments, and acquisition results are their + # own records referencing the original by id (adjudication -> subject_id, + # source_access/acquisition -> data_source_id). + # + # Agent outputs nest VERBATIM: when a type carries another agent's record + # (theory.components, experiment rows, mcts_provenance), the agent's object is + # stored unmodified under its key - orchestrator annotations wrap it, never + # reach into or rename inside it - so a real agent payload always slots in. + # validate-output.sh deep-validates against the compiled JSON Schemas + # (assets/compiled/, regenerated by scripts/compile-schemas.py at build time): + # top-level output keys are closed, but nested objects stay open, so extra + # nested fields from real payloads are always permitted. A field name ending + # in `?` (e.g. mcts_provenance?) is optional; unmarked fields are required. + + # --- Artifacts. The `artifacts` key on every task holds A2A 1.0 Artifacts, + # exactly as the spec defines them: an artifact is an array of typed `parts` + # (wire field names, camelCase). A2A artifacts returned by chain commands are + # stored as received; locally produced byproducts (a rendered figure, a script, + # a data file) are wrapped in the same shape as file parts. Conventions on top + # of the spec: + # - agents tag the artifact kind in metadata.type, e.g. extraction-schema | + # extraction | theory | novelty | theory_store (theorizer) · + # paper-finder-search-result · widget_data_voyager (DV); local byproducts + # use figure | code | data | log | experiment-design. + # - local files are file parts in the *uri* form, uri = repo-root-relative + # path under .asta/<agent>/<slug>/, with a mimeType (image/png, + # text/x-python, text/csv, text/markdown, ...). + # - never put the *bytes* form in output_json - beads caps metadata at ~64KB; + # base64 payloads from agents (e.g. DV figures) are written to disk first + # and referenced by uri. + # Byproducts always travel this channel; a thing the contract *requires* + # (e.g. an analysis's figures) is a typed output key. + + artifact: # A2A 1.0 Artifact, verbatim + artifactId: string # unique within the task (e.g. UUID, or <issue-id>-<n> for local byproducts) + name: string + description: string + parts: [part] + metadata?: object # optional; metadata.type carries the artifact kind + extensions?: [string] # optional; URIs of relevant A2A extensions + + part: # A2A Part union, discriminated by `kind` + kind: string # text | file | data + metadata?: object # optional, per part + # text: {kind: text, text: string} + # file: {kind: file, file: {uri: string, mimeType: string, name: string}} - the only file form allowed in output_json + # {kind: file, file: {bytes: base64, mimeType: string, name: string}} - wire/disk only, never in output_json + # data: {kind: data, data: object} - structured payloads, stored as received + + figure: # the report-embedding form: image is a repo-root-relative path + caption: string # (PNG/SVG), embedded via ![caption](path) + image: string + + experiment: # an auto-ds experiments.json record; these four fields are the + experiment_id: string # required projection - paste the full record in unchanged (extras + status: string # like experiment_plan, code, review, prior/posterior beliefs are + hypothesis: string # permitted and preserved) + analysis: string + + empirical_law: # identity of a discovered law; its verdict lives in the + id: string # adjudication that references it, never here + statement: string + construct: string + source_operationalization: string + source_node: string + effect_size_source: string # the effect size as the source run/paper claims it + grouping_rationale: string + mcts_provenance?: {surprise: number, is_surprising: boolean, prior_belief: object, posterior_belief: object} # optional; the auto-ds experiment record's search-signal fields, verbatim + + dataset: + id: string + definition: string + source: string + n: number + sampling: string + variables: [string] + covers_laws: [string] + + data_source: # the paper behind a run dataset; emitted once by provenance_search + id: string + dataset_id: string # which run dataset this sources (e.g. ds_alaska_elas) + paper_id: string # source paper (Semantic Scholar sha / corpus id) + paper_title: string + paper_url: string + + source_access: # provenance_extraction's enrichment, keyed by data_source id + data_source_id: string + data_availability: string # the paper's data-availability statement, verbatim or summarized + repository: string # e.g. RGI, Zenodo, USGS ScienceBase, PANGAEA + identifier: string # DOI / accession / direct URL for the data + + acquisition: # data_acquisition's result, keyed by data_source id + data_source_id: string + access_status: access_status # acquired | open_unfetched | restricted | not_found + local_path: string # repo-root-relative path once acquired (else empty) + dataset_id: string # the dataset registered from this source (empty if not acquired) + validation_note: string # QC against the paper - n, schema/variables, units, missingness - or why not validated + + cohort: # the data a fresh auto-ds discovery runs against (auto_discovery flow) + id: string + research_question: string # the intent the discovery runs against (from mission.md) + inclusion_criteria: string + exclusion_criteria: string + sampling: string + source_data_sources: [string] # data_source ids the cohort was assembled from + discovery_subset: {definition: string, n: number, path: string} # what discovery sees + holdout_subset: {definition: string, n: number, path: string} # independent, held back for replication + run_id: string # the stood-up auto-ds run (autodiscovery create) + + experiment_design: # one test, committed before its analysis runs; used by the + subject_kind: subject_kind # replication (law) and testing (hypothesis) branches + subject_id: string # the law / theory / hypothesis under test + experiment_name: string + plain_language_description: string + source_operationalization: string # how the source measured it (empty for a novel hypothesis) + independent_operationalization: string + construct_equivalence: construct_equivalence + feasibility: feasibility + required_data: string + data_gap: string + experiment_design_query: string # the natural-language query sent to the experiment designer (input provenance; empty when no designer ran) + prespecified: # the commitment adjudicate checks the result against + test: string # the statistical test / model + metric: string # the quantity that decides it + success_threshold: string # what counts as held, incl. direction; note expected power / min detectable effect if known + + analysis: # DataVoyager's TaskSummary, verbatim (figures are hoisted to the + final_answer: string # task's `figures` output key after imageb64 -> PNG conversion) + assumptions: string # a single text block, as the agent emits it + code: string + + audit_report: + subject_id: string # the law / theory / hypothesis whose analysis was audited + challenges: [{concern: string, check: string, outcome: string}] # include one negative-control check (e.g. shuffled predictor) + artifacts_found: [string] + verdict_survives: boolean + recommended_adjustment: string + + adjudication: # the verdict record; references its subject, never mutates it + subject_kind: subject_kind + subject_id: string + outcome: outcome # held | partial | failed | underpowered | n/a + testability: testability + effect_size_observed: string + prespecified_check: string # the observed metric vs the committed success_threshold + independence_axes: [independence_axis] + data_used: string + evidence: string + + extracted_data: + id: string + run_id: string + paper_id: string + extraction_schema_id: string + rows: + - name_short: string + name_full: string + brief_description: string + citation_title: string + uuid: string + + literature_review: # hypothesis_driven_research's survey output + summary: string + key_findings: [{text: string, uuids: [string]}] + open_gaps: [string] # gaps that motivate hypotheses + citations: [{id: string, corpus_id: number, title: string, url: string, relevance: string}] # corpus_id = canonical S2 corpusId; rows convert mechanically to PaperEntry seeds + + hypothesis: # a slim, directly testable claim (hypothesis_driven_research) + id: string + statement: string + rationale: string # why the literature implies it + falsifiable_prediction: string + grounds: [{text: string, uuids: [string]}] # the evidence the rationale rests on + + theory: + id: string + name: string + description: string + theory_query: string + objective: generation_objective # orchestrator annotation (the generation branch); the agent's own copy is components.generation_objective + grounds_law_ids: [string] # orchestrator annotation - which laws ground this theory (no agent equivalent) + supporting_evidence_ids: [string] # orchestrator annotation + components: # the theorizer's theory record, carried VERBATIM - never flatten or edit + generation_objective: string # the agent's value as emitted (e.g. accuracy-focused) + theory_statements: + - statement_name: string + theory_statement: string + supporting_evidence: [{text: string, uuids: [string]}] + conflicting_evidence: [{text: string, uuids: [string]}] + new_predictions_likely: [string] + new_predictions_unknown: [string] + unaccounted_for: [{text: string, uuids: [string]}] + + testability_triage: + assessments: + - theory_id: string + testable_now: boolean + available_data: string + required_data: string + proposed_test: {test: string, metric: string, success_threshold: string} # prespecified; the verification branch's adjudicate checks against it + gap: string + testable_theory_ids: [string] + + theory_evaluation: + id: string + theory_id: string + novelty: novelty # rollup across statement_evaluations - the most novel statement wins + overall_support: support_level + overall_support_raw?: string # the agent's untyped judgment, verbatim (optional) + explanation: string + statement_evaluations: # the agent's real granularity - novelty is scored per statement + - statement_index: number + novelty: novelty + explanation: string + + next_run_proposal: + kind: string # any flows: or tasks: key in this file + title: string + tests: [string] + data_needed: string + expected_signature: string + priority: priority + + # --- Synthesis reports. One per sub-flow (provenance_report, reproduction_report, + # theory_report, verification_report, hypothesis_report, discovery_report), one + # standalone data-gaps report, and a theory-led master (research_report). Each + # carries report_path (the .md deliverable written first), a title, a one-line + # headline, a typed body, and `links` back to the artifacts, tasks, and papers it + # rests on. Each sub-flow report exposes a local `gaps` list that gap_synthesis + # aggregates into the data_gaps_report. + + provenance_report: + report_path: string + title: string + headline: string + sources: + - dataset_id: string + paper_title: string + paper_url: string + repository: string + access_status: access_status + local_path: string + method_note: string # how sources were matched and the data merged/validated (e.g. join key, resulting n vs the run's n) + acquired: [string] + not_acquired: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + reproduction_report: + report_path: string + title: string + headline: string + method_note: string + laws_ledger: + - law_id: string + statement: string + outcome: outcome + testability: testability + effect_size_source: string + effect_size_observed: string + independence_axes: [independence_axis] + evidence: string + what_held: [string] + what_failed_or_untestable: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + theory_report: + report_path: string + title: string + headline: string + mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]} + theories: + - theory_id: string + name: string + objective: generation_objective + one_line: string + grounds_law_ids: [string] + novelty: novelty + testable_now: boolean + supporting_evidence_ids: [string] + novelty_summary: string + new_predictions: [string] + open_threads: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + verification_report: + report_path: string + title: string + headline: string + novelty_by_verification: + - theory_id: string + claim: string + novelty: novelty + outcome: outcome + effect_size: string + data_used: string + audit_survived: boolean + what_was_tested: string + what_could_not_be_tested: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + hypothesis_report: # synthesis output of the hypothesis_driven_research flow + report_path: string + title: string + headline: string + question: string # the research question from mission.md + ledger: + - hypothesis_id: string + statement: string + outcome: outcome + effect_size_observed: string + evidence: string + answer: string # what the verdicts say about the question + open_questions: [string] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + + data_gaps_report: + report_path: string + title: string + headline: string + gaps: + - item: string + missing_data: string + blocks: string + severity: priority + arose_in: string + next_steps: [next_run_proposal] + figures: [figure] + links: [{label: string, ref: string}] + + research_report: + report_path: string + title: string + headline: string + mechanism: {statement: string, grounded_in: [string], supporting_evidence: [string], conflicting_evidence: [string]} + theory_highlights: + - theory_id: string + claim: string + novelty: novelty + outcome: outcome + inference_chain: [{claim: string, chain: [string]}] + what_was_done: [string] + sub_reports: [{kind: string, report_path: string, one_line: string}] + tensions_and_surprises: [{observation: string, where: string, evidence: string}] + figures: [figure] # at least the one decisive figure, embedded in the report + links: [{label: string, ref: string}] + + discovery_report: # synthesis output of the auto_discovery flow + report_path: string + title: string + headline: string + run_id: string # the discovery run, with its cohort sizes in the report header + laws: + - law_id: string + statement: string + surprise: number # the discovery run's surprise signal for this candidate law + outcome: outcome # from the held-out replication (untested branches are n/a) + deciding_experiment: string # the held-out DataVoyager run/analysis that decided the verdict + effect_size_discovery: string # on the discovery subset + effect_size_holdout: string # on the held-out subset - the pair shows replication shrinkage + interpretation: string # what the run means against the question that motivated it + next_steps: [next_run_proposal] + figures: [figure] + gaps: [{item: string, missing_data: string, blocks: string, severity: priority}] + links: [{label: string, ref: string}] + +# Tasks are pure output contracts: output maps each output_json key to its type, +# [type] meaning a JSON array of that type. Every task also carries artifacts. +# A task's inputs are declared per flow step (the same output shape takes +# different inputs in different flows), under `input:` in the flows below. + +tasks: + provenance_search: {output: {data_sources: [data_source], artifacts: [artifact]}} + provenance_extraction: {output: {extracted_data: extracted_data, source_access: [source_access], artifacts: [artifact]}} + data_acquisition: {output: {acquisitions: [acquisition], datasets: [dataset], artifacts: [artifact]}} + provenance_synthesis: {output: {provenance_report: provenance_report, artifacts: [artifact]}} + data_driven_discovery: {output: {experiments: [experiment], datasets: [dataset], artifacts: [artifact]}} + law_extraction: {output: {empirical_laws: [empirical_law], artifacts: [artifact]}} + evidence_gathering: {output: {datasets: [dataset], artifacts: [artifact]}} + experiment_design: {output: {experiment_design: experiment_design, artifacts: [artifact]}} + analysis: {output: {analysis: analysis, figures: [figure], artifacts: [artifact]}} + audit: {output: {audit_report: audit_report, artifacts: [artifact]}} + adjudicate: {output: {adjudication: adjudication, artifacts: [artifact]}} + reproduction_synthesis: {output: {reproduction_report: reproduction_report, artifacts: [artifact]}} + evidence_extraction: {output: {extracted_data: extracted_data, artifacts: [artifact]}} + theory_formation: {output: {theories: [theory], artifacts: [artifact]}} + testability_triage: {output: {testability_triage: testability_triage, artifacts: [artifact]}} + novelty_assessment: {output: {theory_evaluations: [theory_evaluation], artifacts: [artifact]}} + theory_synthesis: {output: {theory_report: theory_report, artifacts: [artifact]}} + verification_synthesis: {output: {verification_report: verification_report, artifacts: [artifact]}} + gap_synthesis: {output: {data_gaps_report: data_gaps_report, artifacts: [artifact]}} + final_synthesis: {output: {research_report: research_report, artifacts: [artifact]}} + # hypothesis_driven_research flow + literature_review: {output: {literature_review: literature_review, artifacts: [artifact]}} + hypothesis_formation: {output: {hypotheses: [hypothesis], artifacts: [artifact]}} + hypothesis_synthesis: {output: {hypothesis_report: hypothesis_report, artifacts: [artifact]}} + # auto_discovery flow (its own session in a separate workspace: source a cohort, run a fresh discovery, replicate on held-out data) + cohort_assembly: {output: {cohort: cohort, datasets: [dataset], artifacts: [artifact]}} + discovery_run: {output: {experiments: [experiment], empirical_laws: [empirical_law], artifacts: [artifact]}} + holdout_replication: {output: {adjudication: adjudication, figures: [figure], artifacts: [artifact]}} + discovery_synthesis: {output: {discovery_report: discovery_report, artifacts: [artifact]}} + +# Each flow step carries: mission (what the work is), input (the upstream steps +# in this session whose issues plan wires as the task's inputs), and chain (the +# asta commands). A node with a chain is a step; a node with only child nodes +# and a mission is a group; a chain item {workflow: <flow>, mission: <text>} +# expands the named sub-flow inline. A group whose branches are created at +# replan (one per law / theory / hypothesis, once the naming step closes) +# declares `replan: true`. + +flows: + + data_and_literature_grounded_theory_generation: + mission: Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report. + data_provenance: + mission: Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand. + chain: + - {workflow: data_provenance, mission: Source the papers and datasets the run named in the mission was built on; acquire the open data and record what is restricted.} + reproduction: + mission: Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data. + chain: + - {workflow: reproduction, mission: Import the run named in the mission; reproduce each law on independent data with construct-equivalence and a feasibility gate.} + theorizer: + mission: Generate literature- and data-grounded theories of the reproduced laws and score their novelty. + chain: + - {workflow: theorizer, mission: Ground theories in the reproduced laws under two objectives; triage what is testable on hand-data; score novelty on the testable subset.} + verification: + mission: One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan. + replan: true + analysis: + mission: Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [testability_triage, data_driven_discovery, evidence_gathering] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id. + input: [testability_triage, analysis, audit] + chain: [] + verification_synthesis: + mission: Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`. + input: [verification, novelty_assessment] + chain: [] + gap_synthesis: + mission: Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it. + input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis] + chain: [] + final_synthesis: + mission: Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them. + input: [provenance_synthesis, reproduction_synthesis, theory_synthesis, verification_synthesis, gap_synthesis] + chain: [] + + data_provenance: + mission: Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed. + provenance_search: + mission: Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url). + input: [] + chain: [asta literature find, asta papers search] + provenance_extraction: + mission: Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable. + input: [provenance_search] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + data_acquisition: + mission: For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work. + input: [provenance_search, provenance_extraction] + chain: [asta documents, asta autodiscovery upload] + provenance_synthesis: + mission: Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note. + input: [provenance_search, provenance_extraction, data_acquisition] + chain: [] + + reproduction: + mission: Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run. + data_driven_discovery: + mission: Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the "data in hand" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted. + input: [] + chain: [asta autodiscovery run, asta autodiscovery experiments] + law_extraction: + mission: Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication. + input: [data_driven_discovery] + chain: [] + evidence_gathering: + mission: One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test. + input: [law_extraction] + chain: [asta literature find, asta papers search, asta documents, asta autodiscovery upload] + replication: + mission: One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data. + replan: true + experiment_design: + mission: State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap. + input: [law_extraction, evidence_gathering] + chain: [asta experiment] + analysis: + mission: Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [experiment_design, evidence_gathering] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted. + input: [experiment_design, analysis, audit] + chain: [] + reproduction_synthesis: + mission: Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate. + input: [law_extraction, replication] + chain: [] + + theorizer: + mission: Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test. + evidence_extraction: + mission: Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on. + input: [law_extraction, adjudicate] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + theory_generation: + mission: Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws. + theory_formation: + mission: Form theories from the shared extraction store under this branch's objective. + input: [evidence_extraction] + chain: [asta generate-theories form-theory] + testability_triage: + mission: Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps. + input: [theory_generation, data_driven_discovery, evidence_gathering] + chain: [] + novelty_assessment: + mission: Stock novelty scoring against the shared corpus, run only on the testable subset of theories. + input: [testability_triage] + chain: [asta generate-theories evaluate-novelty] + theory_synthesis: + mission: Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`. + input: [theory_generation, novelty_assessment, testability_triage] + chain: [] + + hypothesis_driven_research: + mission: Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize. + literature_review: + mission: Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations. + input: [] + chain: [asta literature find, asta papers search] + hypothesis_formation: + mission: Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds. + input: [literature_review] + chain: [asta generate-theories build-extraction-schema, asta generate-theories find-and-extract] + testing: + mission: One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end. + replan: true + experiment_design: + mission: Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap. + input: [hypothesis_formation, literature_review] + chain: [asta experiment] + data_acquisition: + mission: Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input. + input: [experiment_design] + chain: [asta documents, asta autodiscovery upload] + analysis: + mission: Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [experiment_design, data_acquisition] + chain: [asta analyze-data submit, asta analyze-data poll] + audit: + mission: Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears. + input: [analysis] + chain: [asta analyze-data submit, asta analyze-data poll] + adjudicate: + mission: Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id. + input: [experiment_design, analysis, audit] + chain: [] + hypothesis_synthesis: + mission: Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses. + input: [hypothesis_formation, testing] + chain: [] + + auto_discovery: + mission: Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one. + cohort_assembly: + mission: Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets. + input: [] + chain: [asta literature find, asta documents, asta generate-theories find-and-extract, asta autodiscovery create, asta autodiscovery upload, asta autodiscovery metadata] + discovery_run: + mission: Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws. + input: [cohort_assembly] + chain: [asta autodiscovery submit, asta autodiscovery experiments] + replication: + mission: One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset. + replan: true + holdout_replication: + mission: Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions. + input: [discovery_run, cohort_assembly] + chain: [asta analyze-data submit, asta analyze-data poll] + discovery_synthesis: + mission: Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap. + input: [discovery_run, replication] + chain: [] diff --git a/plugins/asta/skills/research-step/scripts/close-task.sh b/plugins/asta/skills/research-step/scripts/close-task.sh new file mode 100755 index 0000000..7535a38 --- /dev/null +++ b/plugins/asta/skills/research-step/scripts/close-task.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# close-task.sh <issue-id> <output-json> <output-markdown> +# Publish a task's output and finish it: write output_json + output_markdown into the issue +# metadata, validate output_json against the schema, close the issue, assert it closed, then +# close any ancestor group whose last child just closed. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -eq 3 ]] || { echo "usage: close-task.sh <issue-id> <output-json> <output-markdown>" >&2; exit 1; } +id="$1"; oj="$2"; om="$3" +[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; } +[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; } +jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; } + +# 1. publish: merge output_json + output_markdown into the existing research_step metadata +cur="$(bd show "$id" --json | jq -c '.[0].metadata')" +merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \ + '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$merged" > "$tmp" +bd update "$id" --metadata @"$tmp" >/dev/null + +# 2. validate structurally (reads the issue back; no style lint) +bash "$here/validate-output.sh" "$id" + +# 3. close and 4. assert closure +bd close "$id" >/dev/null +[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \ + || { echo "close-task: $id did not close" >&2; exit 2; } +echo "closed $id" + +# 5. cascade: close each ancestor group whose direct children are all closed. +# The epic root is never closed here — "root open, no open tasks" is the +# session-complete state that epic-root.sh and the workflows rely on. +cur_id="$id" +while [[ "$cur_id" == *.* ]]; do + parent="${cur_id%.*}" + parent_json="$(bd show "$parent" --json 2>/dev/null)" || break + [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break + open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" ' + [ .[] + | select(.id | startswith($p + ".")) + | select((.id[($p|length)+1:] | contains(".")) | not) + | select(.status != "closed") ] | length')" + [[ "$open_kids" -eq 0 ]] || break + if bd close "$parent" >/dev/null 2>&1; then + echo "closed group $parent" + else + echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2 + break + fi + cur_id="$parent" +done diff --git a/plugins/asta/skills/research-step/scripts/create-task.sh b/plugins/asta/skills/research-step/scripts/create-task.sh new file mode 100755 index 0000000..1e992a9 --- /dev/null +++ b/plugins/asta/skills/research-step/scripts/create-task.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# create-task.sh <parent-id> <task_type> <flow> <title> <brief-description> [input-id ...] +# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description, +# and initialized research_step metadata. output_json / output_markdown stay null until +# execute publishes them via close-task.sh. Prints the new issue id. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; } +parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5 + +# Validate the task_type against schemas.yaml. The helper exits 3 for an +# unknown task_type (and prints the known ones) or 5 when the schema cannot +# be read (e.g. PyYAML missing — run init); set -e propagates either. +"$here/task-output-keys.sh" "$task_type" >/dev/null + +[[ -n "$desc" ]] || { echo "create-task: a brief description is required" >&2; exit 4; } +[[ "$desc" != *$'\n'* ]] || { echo "create-task: description must be one line" >&2; exit 4; } +[[ "${#desc}" -le 200 ]] || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; } + +if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi +meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \ + '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$meta" > "$tmp" +bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent diff --git a/plugins/asta/skills/research-step/scripts/epic-root.sh b/plugins/asta/skills/research-step/scripts/epic-root.sh index 13a7dfd..c176ef0 100755 --- a/plugins/asta/skills/research-step/scripts/epic-root.sh +++ b/plugins/asta/skills/research-step/scripts/epic-root.sh @@ -33,7 +33,7 @@ if ! command -v jq >/dev/null 2>&1; then exit 3 fi -ids=$(bd list --json | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id') +ids=$(bd list --json --limit 0 | jq -r '.[] | select(.metadata.research_step.epic_root == true) | .id') count=$(printf '%s' "$ids" | grep -c . || true) case "$count" in diff --git a/plugins/asta/skills/research-step/scripts/next-task.sh b/plugins/asta/skills/research-step/scripts/next-task.sh new file mode 100755 index 0000000..97e3592 --- /dev/null +++ b/plugins/asta/skills/research-step/scripts/next-task.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# next-task.sh — the single definition of task ordering. Prints the open task +# issues (status == open, metadata.research_step.task_type set), sorted +# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical +# sort would get this wrong past 9 siblings). Groups (no task_type) are never +# listed; there are no dependency edges, so this order is the ordering signal. +# +# Used by execute (pick the next task) and update-summary (render the queue), +# so the two never disagree about what runs next. +# +# Output (stdout, key: value lines): +# next: <bd-id> | none +# queue: <space-separated bd-ids> (omitted when empty) +# Exit: 0 (even when next: none) · 3 bd/jq missing +set -euo pipefail + +command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; } +command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; } + +ids="$(bd list --json --limit 0 | jq -r ' + [ .[] + | select(.status == "open") + | select(.metadata.research_step.task_type != null) ] + | sort_by(.id | split(".") | map(tonumber? // .)) + | .[].id')" + +if [[ -z "$ids" ]]; then + echo "next: none" + exit 0 +fi + +echo "next: $(head -n1 <<<"$ids")" +rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')" +[[ -n "$rest" ]] && echo "queue: $rest" || true diff --git a/plugins/asta/skills/research-step/scripts/summary-check.sh b/plugins/asta/skills/research-step/scripts/summary-check.sh index 8d98b65..6a14470 100755 --- a/plugins/asta/skills/research-step/scripts/summary-check.sh +++ b/plugins/asta/skills/research-step/scripts/summary-check.sh @@ -30,7 +30,7 @@ if ! command -v jq >/dev/null 2>&1; then exit 3 fi -current=$(bd list --json \ +current=$(bd list --json --limit 0 \ | jq -r '.[] | select(.status != "closed") | .id' \ | sort \ | shasum -a 256 \ diff --git a/plugins/asta/skills/research-step/scripts/task-output-keys.sh b/plugins/asta/skills/research-step/scripts/task-output-keys.sh new file mode 100755 index 0000000..ef1269b --- /dev/null +++ b/plugins/asta/skills/research-step/scripts/task-output-keys.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# task-output-keys.sh <task_type> — print the space-separated output keys for a +# task from assets/schemas.yaml. The single schema reader for scripts: +# create-task.sh uses it to validate a task_type, validate-output.sh to get the +# expected output_json keys. +# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema +# (python3/PyYAML missing or schemas.yaml unreadable — run init) +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +schemas="$here/../assets/schemas.yaml" + +[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; } + +python3 - "$schemas" "$1" <<'PY' +import sys + +try: + import yaml +except ImportError: + print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr) + sys.exit(5) + +try: + with open(sys.argv[1]) as f: + d = yaml.safe_load(f) +except Exception as e: + print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr) + sys.exit(5) + +tasks = d.get("tasks") or {} +t = tasks.get(sys.argv[2]) +if t is None: + print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr) + print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr) + sys.exit(3) +print(" ".join(t["output"])) +PY diff --git a/plugins/asta/skills/research-step/scripts/validate-output.sh b/plugins/asta/skills/research-step/scripts/validate-output.sh index 0f5a84e..69530f9 100755 --- a/plugins/asta/skills/research-step/scripts/validate-output.sh +++ b/plugins/asta/skills/research-step/scripts/validate-output.sh @@ -1,102 +1,65 @@ #!/usr/bin/env bash -# validate-output.sh — structural validation of a research_step output JSON. -# -# Usage: validate-output.sh <task_type> <metadata-json-file> -# -# Verifies that the JSON file: -# 1. parses -# 2. carries the canonical metadata envelope -# ({research_step: {task_type, inputs, output_schema_version, output}}) -# 3. has every required `output.<key>` for the given <task_type> per -# assets/schemas.yaml (schema_version: 1) -# -# Exit codes: -# 0 — valid -# 2 — JSON parse error -# 3 — unknown task_type -# 4 — missing required field -# 5 — task_type mismatch with envelope -# -# This is structural validation only. Quality validation (sound prediction, -# sane confidence, valid citations) is out of scope per execute.md. +# validate-output.sh <issue-id> — structural check of a task's stored output_json. +# Reads the issue from beads and deep-validates metadata.research_step.output_json +# against the compiled JSON Schema (assets/compiled/<task_type>.schema.json, +# regenerated from schemas.yaml by scripts/compile-schemas.py at build time): +# top-level keys closed, declared nested fields required, extra nested fields +# permitted (payloads nest verbatim). No style or quality linting. +# Exit: 0 ok · 1 usage · 2 bad issue/metadata · 3 unknown task +# · 4 schema violation +# · 5 schema unreadable (PyYAML/jsonschema missing or compiled schema +# absent — run the init workflow, or update the plugin) set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -if [[ $# -ne 2 ]]; then - echo "usage: validate-output.sh <task_type> <metadata-json-file>" >&2 - exit 1 -fi +[[ $# -eq 1 ]] || { echo "usage: validate-output.sh <issue-id>" >&2; exit 1; } +id="$1" -task_type="$1" -file="$2" +rs="$(bd show "$id" --json 2>/dev/null | jq -c '.[0].metadata.research_step // empty')" +[[ -n "$rs" ]] || { echo "validate-output: $id has no metadata.research_step" >&2; exit 2; } +task_type="$(jq -r '.task_type // empty' <<<"$rs")" +[[ -n "$task_type" ]] || { echo "validate-output: $id has no task_type" >&2; exit 2; } -if ! jq -e . "$file" > /dev/null 2>&1; then - echo "validate-output: $file is not valid JSON" >&2 - exit 2 -fi +# Exits 3 (unknown task_type) or 5 (schema unreadable) with its own message. +"$here/task-output-keys.sh" "$task_type" >/dev/null -# Required output fields, mirroring assets/schemas.yaml (schema_version: 1). -case "$task_type" in - scope) required="question boundaries success_criteria" ;; - definitions) required="terms" ;; - literature_review) required="summary_path key_findings gaps citations" ;; - hypothesis) required="statement rationale falsifiable_prediction expected_evidence" ;; - experiment_design) required="method procedure variables artifacts_expected" ;; - evidence_gathering) required="artifacts log_path deviations" ;; - analysis) required="verdict confidence reasoning caveats" ;; - synthesis) required="answer supporting_hypotheses refuted_hypotheses open_questions report_path" ;; - *) - echo "validate-output: unknown task_type '$task_type'" >&2 - echo "validate-output: expected one of scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis" >&2 - exit 3 - ;; -esac +got="$(jq -c '.output_json // empty' <<<"$rs")" +[[ -n "$got" && "$got" != "null" ]] || { echo "validate-output: $id has no output_json" >&2; exit 4; } -# Envelope must carry the matching task_type so we don't validate scope JSON -# against an analysis schema by accident. -envelope_type=$(jq -r '.research_step.task_type // empty' "$file") -if [[ -z "$envelope_type" ]]; then - echo "validate-output: $file missing .research_step.task_type" >&2 +schema="$here/../assets/compiled/${task_type}.schema.json" +[[ -r "$schema" ]] || { + echo "validate-output: compiled schema missing for '$task_type' ($schema) — update the plugin (it is regenerated at build time)" >&2 exit 5 -fi -if [[ "$envelope_type" != "$task_type" ]]; then - echo "validate-output: envelope task_type='$envelope_type' but expected '$task_type'" >&2 - exit 5 -fi +} +OUTPUT_JSON="$got" python3 - "$schema" "$task_type" <<'PY' +import json +import os +import sys -# Envelope shape sanity. -for key in inputs output_schema_version output; do - if ! jq -e ".research_step | has(\"$key\")" "$file" >/dev/null; then - echo "validate-output: $file missing .research_step.$key" >&2 - exit 5 - fi -done +try: + import jsonschema +except ImportError: + print("validate-output: python3 cannot import jsonschema - run the init workflow", file=sys.stderr) + sys.exit(5) -# Required output fields. -for key in $required; do - if ! jq -e ".research_step.output | has(\"$key\")" "$file" >/dev/null; then - echo "validate-output: missing required field 'output.$key' for task_type '$task_type'" >&2 - exit 4 - fi -done +with open(sys.argv[1]) as f: + schema = json.load(f) +data = json.loads(os.environ["OUTPUT_JSON"]) -# Type spot-checks for the high-leverage cases. Not exhaustive — just the -# fields where a wrong type at this layer would silently break update-summary rendering -# or downstream tasks. -case "$task_type" in - literature_review) - jq -e '.research_step.output.key_findings | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.key_findings must be an array" >&2; exit 4; } - jq -e '.research_step.output.gaps | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.gaps must be an array" >&2; exit 4; } - jq -e '.research_step.output.citations | type == "array"' "$file" >/dev/null \ - || { echo "validate-output: output.citations must be an array" >&2; exit 4; } - ;; - analysis) - jq -e '.research_step.output.verdict | IN("supported", "refuted", "inconclusive")' "$file" >/dev/null \ - || { echo "validate-output: output.verdict must be one of supported|refuted|inconclusive" >&2; exit 4; } - jq -e '.research_step.output.confidence | type == "number" and . >= 0 and . <= 1' "$file" >/dev/null \ - || { echo "validate-output: output.confidence must be a number in [0, 1]" >&2; exit 4; } - ;; -esac +validator = jsonschema.Draft202012Validator(schema) +errors = sorted(validator.iter_errors(data), key=lambda e: list(map(str, e.absolute_path))) +if errors: + for e in errors[:5]: + path = ".".join(str(p) for p in e.absolute_path) + where = f"output_json.{path}" if path else "output_json" + hint = "" + if e.validator == "additionalProperties" and not path: + hint = " - byproducts go in artifacts" + print(f"validate-output: {where}: {e.message}{hint}", file=sys.stderr) + if len(errors) > 5: + print(f"validate-output: ... and {len(errors) - 5} more schema violation(s)", file=sys.stderr) + print(f"validate-output: output_json does not satisfy the '{sys.argv[2]}' schema", file=sys.stderr) + sys.exit(4) +PY echo "ok" diff --git a/plugins/asta/skills/research-step/scripts/write-meta.sh b/plugins/asta/skills/research-step/scripts/write-meta.sh deleted file mode 100755 index 6e7d71a..0000000 --- a/plugins/asta/skills/research-step/scripts/write-meta.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# write-meta.sh — materialize a metadata JSON blob to a temp file and print -# its path, suitable for `bd update <id> --metadata @<path>` or -# `bd create ... --metadata=@<path>`. -# -# Reads JSON from stdin (or from $1 if a path is given), validates that it -# parses, and writes it under $TMPDIR with mode 0600. The path is printed on -# stdout so the caller can splice it into a bd command. -# -# Why this exists: `bd update --metadata` accepts either a JSON string or -# `@file.json`. Inlining a JSON string requires `"$(cat /tmp/x.json)"` (a -# non-bd shell op the SKILL.md frontmatter does not permit), and shell quoting -# gets fragile with embedded quotes. Materializing a file once and using -# `@path` keeps everything in `Bash(bd:*)` territory. -set -euo pipefail - -tmp=$(mktemp -t research-step-meta.XXXXXX.json) -trap 'rm -f "$tmp"' ERR - -if [[ $# -ge 1 ]]; then - cp "$1" "$tmp" -else - cat > "$tmp" -fi - -if ! jq -e . "$tmp" >/dev/null 2>&1; then - echo "write-meta: input is not valid JSON" >&2 - rm -f "$tmp" - exit 2 -fi - -chmod 0600 "$tmp" -echo "$tmp" diff --git a/plugins/asta/skills/research-step/workflows/brainstorm.md b/plugins/asta/skills/research-step/workflows/brainstorm.md index 884f48f..6a9bbf6 100644 --- a/plugins/asta/skills/research-step/workflows/brainstorm.md +++ b/plugins/asta/skills/research-step/workflows/brainstorm.md @@ -25,27 +25,27 @@ If `has_epic`, hand off to **update-summary** before anything else so `summary.m Pick the branch that matches; do not run more than one. - **No `mission.md`** → help the user draft one. - Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**. + Engage in a short Socratic exchange. Useful prompts: the research question, why it matters, what success looks like, what's already known, what's explicitly out of scope. Also settle the **flow(s)**: open `assets/schemas.yaml` and enumerate the keys under `flows:` — do **not** offer flows from memory; the file is the only source of the list, and each flow's purpose is in its `mission` field. A custom chain of `tasks:` entries is also an option. A session may run more than one flow. Record the chosen flow(s) in `mission.md` so `plan` can read them. Also surface the session **config knobs** (the `config:` section of `assets/schemas.yaml`, e.g. `n_experiments`, `max_papers_to_retrieve`) with their defaults; record any non-default choices in a `## Config` section of `mission.md` (one `key: value` line each) — `plan` pins the resolved config on the epic at bootstrap. When you have enough, propose a draft, get confirmation, and write `mission.md`. Then offer to run **init**. - **`mission.md` exists, no epic** → recap the mission, check whether the user wants to refine it, then offer to run **init** to bootstrap the research session. -- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / ready counts plus the single most-relevant ready task) and ask what they want to do next. +- **Active session (`has_epic`)** → answer the user's question, or if they didn't ask one, give a short status report (closed / in-progress / open-task counts plus the next task from `scripts/next-task.sh`) and ask what they want to do next. ### 3. Answer questions, preferring `summary.md` -`summary.md` is the synthesized view of the session — mission, scope, definitions, related work, hypotheses, results, open questions, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current. +`summary.md` is the synthesized view of the session — mission, flow(s), results so far (report headlines), gaps, and status. It was just regenerated by the `update-summary` hand-off in step 1, so it is current. -**Default path: read `summary.md`.** For most questions ("what's the current scope?", "which hypotheses are open?", "what's blocking progress?", "what's the state of H2?"), the answer is already in this file. Read it first; quote or summarize the relevant section. +**Default path: read `summary.md`.** For most questions ("which laws held?", "what theories came out?", "what's blocking progress?", "what's next?"), the answer is already in this file. Read it first; quote or summarize the relevant section. **Drop down to beads only when the digest doesn't have the answer.** `summary.md` summarizes; some questions need the raw outputs: | Need | Query | |---|--------------------------------------------------------------------------------------------------------| -| Single issue's full `metadata.research_step.output` | `bd show <id> --json` | -| Full open-issue metadata (rare; usually the digest covers it) | `bd list` | -| Dependency structure | `bd dep tree <epic-id> --direction up`| -| Long-form notes from an evidence_gathering task | follow `metadata.research_step.output.summary_path` referenced from the digest | -| Exact `verdict` / `confidence` for a hypothesis | `bd show <analysis-id> --json` (digest reports the verdict, not the confidence number) | +| Single issue's full output (`output_json` + `output_markdown`) | `bd show <id> --json` | +| Full issue metadata (rare; usually the digest covers it) | `bd list --all --limit 0` | +| Task tree | `bd list --json --all --limit 0` — ids encode the parent-child outline | +| Long-form content behind a report | follow `report_path` (or any `_path` field) from the issue's `output_json` | +| Exact verdict / effect size for a law, theory, or hypothesis | `bd show <adjudicate-id> --json` (the adjudication record; the digest reports headlines, not the numbers) | Rule of thumb: if you can answer from `summary.md`, do. If the user asks for a specific number, file path, or verbatim output that the digest abstracts, then fetch it from `bd`. diff --git a/plugins/asta/skills/research-step/workflows/execute.md b/plugins/asta/skills/research-step/workflows/execute.md index 5fba9ea..b4ba1ef 100644 --- a/plugins/asta/skills/research-step/workflows/execute.md +++ b/plugins/asta/skills/research-step/workflows/execute.md @@ -5,36 +5,39 @@ Run one ready task end-to-end. Loads its schema, gathers its declared inputs, pr ## Preconditions - An epic root exists (`scripts/epic-root.sh` prints `status: found`). -- `bd ready --json` is non-empty, **or** the caller supplied a specific task ID that is currently `open` and unblocked. +- An open issue with a `task_type` exists, **or** the caller supplied a specific `open` task ID. ## Steps -1. **Pick a task.** If a task ID was supplied, use it. Else `bd ready --json` and pick the oldest issue (tiebreak by `bd-id` ascending). Hypothesis tasks are normally auto-resolved at creation by **plan**, so they should not appear here. If one does, it means the gap text was too thin for plan to fill the output without inventing content — flag this to the user and ask whether to refine the source `literature_review` first. -2. **Claim it.** `bd update <id> --status=in_progress`. -3. **Load the schema.** Read the task type with `bd show <id> --json | jq -r '.[0].metadata.research_step.task_type'`. Open `assets/schemas.yaml` and find the matching entry under `task_types`. -4. **Gather inputs.** For every issue listed in this issue's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `summary_path` from a `literature_review`). **This is the only context to use** — do not pull in unrelated repo state. -5. **Do the work.** Produce a JSON object matching the schema. For schema fields ending in `_path`, write the file to disk first and put the relative path in the JSON. -6. **Validate structurally.** Run `scripts/validate-output.sh <task_type> <metadata-json-file>`. It checks the envelope (`research_step.task_type`, `inputs`, `output_schema_version`, `output`) and every required `output.<key>` for the task_type, plus type spot-checks for the high-leverage cases (e.g., `analysis.verdict` enum, `analysis.confidence` range). Exit 0 ⇒ valid. Any non-zero exit ⇒ fail loudly and **leave the issue `in_progress`** for retry. Do not close. -7. **Persist the output.** Materialize the metadata JSON via `scripts/write-meta.sh` (reads JSON from stdin, prints a temp file path), then `bd update <id> --metadata @<path>`. Preserve the existing `task_type`, `inputs`, and `output_schema_version`. -8. **Close.** `bd close <id>`. -9. **Hand off to plan or update-summary.** Some closed task types unlock new graph structure; others don't. Decide based on the closed task's `task_type`: +1. **Pick a task.** If a task ID was supplied, use it. Else run `scripts/next-task.sh` and take the `next:` id — it is the single definition of ordering (open issues with a `task_type`, numerically sorted by hierarchical id; `update-summary` renders the same order). `next: none` ⇒ report that and stop. Grouping issues (epics, no `task_type`) are never executed; `close-task.sh` closes them when their last child closes. Do not use `bd ready` — there are no dependency edges, so id order is the ordering signal. +2. **Check readiness.** For every issue id in this task's `inputs` (`bd show <id> --json | jq '.[0].metadata.research_step.inputs'`), verify it is `closed` with a non-null `output_json`. If any input is not ready, **stop and report it** — the graph was built out of order (a task left `in_progress`, or a replan misordering); do not improvise the missing input. This is the readiness check that dependency edges used to provide. +3. **Claim it.** `bd update <id> --status=in_progress`. +4. **Load the schema and config.** Read the flow and task type with `bd show <id> --json | jq -r '.[0].metadata.research_step | .flow, .task_type'`. In `assets/schemas.yaml`: the task's output shape is `tasks.<task_type>.output` (a mapping of key → type; `[type]` means a JSON array of that type); find the step inside `flows.<flow>` — it may be nested under a fan-out group (e.g. `flows.reproduction.replication.experiment_design`) — and use its `mission`, `input`, and `chain`. Read the **session config** pinned on the epic root (`bd show <epic-id> --json | jq '.[0].metadata.research_step.config'`) and pass its values into the chain where they apply — `n_experiments` into the run-metadata JSON for `asta autodiscovery metadata`, `max_papers_to_retrieve` on `asta generate-theories find-and-extract`. Do not re-read defaults from schemas.yaml mid-session; the pin is the truth. (Sessions bootstrapped before config pinning exist: an absent pin means use the schemas.yaml defaults.) +5. **Gather inputs.** For every issue listed in this issue's `inputs`, read its output with `bd show <input-id> --json | jq '.[0].metadata.research_step.output_json'`. Also load `mission.md` and any files referenced from input outputs via `_path` fields (e.g., `report_path` from `reproduction_synthesis`). **This is the only context to use** — do not pull in unrelated repo state. +6. **Do the work.** Follow the step's `mission` and run its `chain` (the asta commands). Produce two things: + - **`output_json`** — a JSON object holding exactly the schema's output keys for this task (`tasks.<task_type>.output`), and nothing else. Fill every typed field the schema declares (including typed verdicts like `adjudication.outcome` or `audit_report.verdict_survives`); only values with **no typed field** (an execution id, intermediate file paths, raw tool output) go in `artifacts`. Artifact rows are **A2A 1.0 Artifacts** — `{artifactId, name, description, parts, metadata}`, where `parts` is an array of text / file / data parts (see `artifact` and `part` in the schema). Artifacts returned by chain commands are stored as received (their kind in `metadata.type`); locally produced byproducts (a figure, a script, a data file) are wrapped as file parts in the uri form — repo-root-relative path plus mimeType — never the bytes form (beads' ~64KB cap). Records are immutable — emit verdicts and enrichments as their own records referencing the original by id (`adjudication.subject_id`, `source_access.data_source_id`); never re-emit an upstream record with changed values. Keep it slim: beads stores metadata inline and rejects large blobs (~64KB+), so put heavy data (raw agent JSON, datasets, full extractions) under `.asta/<agent>/<slug>/` and reference it by repo-root-relative path. `<agent>` is the asta command group (`literature`, `generate-theories`, `autodiscovery`, `analyze-data`); `<slug>` is `YYYY-MM-DD-<short-query-slug>`. Preserve evidence uuids that tie a finding back to its paper. For schema fields ending in `_path`, write the file first and put the path in the JSON. + - **`output_markdown`** — a concise write-up of the result, one `## <key>` section per output key, following the **Report conventions** below (entity hyperlinks, tables, figures). This is guidance, not a gate — the scripts do not assert style. Keep it a digest; heavy data stays in the artifact files. +7. **Finish with `close-task.sh`.** Write the two files — `output.json` (the `output_json` object) and `output.md` (the `output_markdown`) — then run `scripts/close-task.sh <id> <output.json> <output.md>`. It publishes both into the issue metadata, validates `output_json` structurally against the schema (keys must equal the keys of `tasks.<task_type>.output` — which always include `artifacts` — none null; no style checks), closes the issue, confirms it closed, and closes any ancestor group whose last child just closed (it never closes the epic root — the session-complete state is root open with no open tasks). A non-zero exit **before** the `closed <id>` line means the issue is still `in_progress` — fix and re-run. A warning **after** `closed <id>` means the task closed but a group could not be auto-closed; close that group manually. The `description` is untouched; it stays the brief one-liner set at creation. +8. **Hand off.** If the flow has steps after this one, hand off to **plan** (source = this issue) to create them; plan chains to **update-summary**. If this was the flow's final synthesis, hand off to **update-summary** directly. - | Closed task_type | Hand off to | - |---|---| - | `literature_review`, `hypothesis`, `analysis`, `synthesis` | **plan** (with this issue as the source). `plan` then chains to **update-summary**. Note: `hypothesis` only reaches this branch in the rare case it was left open at creation; the normal path is plan→auto-resolve. | - | `scope`, `definitions`, `experiment_design`, `evidence_gathering` | **update-summary** directly. | +## Report conventions - Either path ends with `summary.md` rebuilt. +These apply to every `output_markdown` and to every `*_synthesis` report deliverable. Rigorous but not over the top: a report stays roughly 50–100 lines; the detail behind it lives in artifacts it links to. -## Notes on output files +- **Every named entity is a hyperlink.** Papers → DOI or canonical Semantic Scholar URL; datasets and result files → relative path; runs/experiments → their artifact or metadata file; laws/theories/hypotheses → their ledger row, written with an anchor (`<a id="l1"></a>`) so other reports can deep-link (`reproduction_report.md#l1`). A named thing with no link is a defect. +- **Tables are the spine.** Any ledger, matrix, or catalog (laws × outcomes, theories × verdicts, sources × access) is a table with one row per record, mirroring the typed rows in `output_json`. +- **Figures carry the quantitative claims.** Embed each one (`![caption](path)`) where the claim is made and list it in the `figures` output field. Analysis-type tasks must emit at least one figure; synthesis reports embed the figures their headline rests on (effect-size comparisons, verdict panels, discovery-vs-holdout shrinkage). +- Neutral, third-person register; numbers in the text match the tables they summarize. -Schema fields ending in `_path` are relative paths. Conventions: +## Notes on output -- `summary_path` (from `literature_review`) → `background_knowledge.txt` by convention, but any path works. -- `log_path` (from `evidence_gathering`) → typically under `logs/`. -- `report_path` (from `synthesis`) → typically `report.md`. +The structured result is `metadata.research_step.output_json`; the narrative is `metadata.research_step.output_markdown`. The issue **`description`** is the brief one-liner set at creation by `create-task.sh` and is not overwritten. Heavy artifacts live under `.asta/<agent>/<slug>/` where `<slug>` is `YYYY-MM-DD-<short-query-slug>`, referenced by repo-root-relative path (`.asta/<agent>/<slug>/<file>`, repo files like the auto-ds inputs as `inputs/<path>`). `output_json.artifacts` holds A2A Artifacts whose file parts reference those paths by uri; heavy payloads (base64 bytes, raw agent JSON) stay on disk, never inline. -Write the file before setting the output JSON. If the executor crashes between writing the file and closing the issue, the file is harmless orphan data — re-running `execute` on the same issue will overwrite it. +Schema fields ending in `_path` are repo-root-relative paths — write the file before putting the path in `output_json`: + +- `report_path` (from every `*_synthesis` report) → the report's `.md` deliverable. The master `final_synthesis` report is typically `report.md` at the repo root; the per-sub-flow reports go under `.asta/<agent>/<slug>/` or alongside it (e.g. `reproduction_report.md`, `theory_report.md`, `verification_report.md`, `hypothesis_report.md`, `data_gaps_report.md`). + +If the executor crashes between writing a file and closing the issue, the file is harmless orphan data — re-running `execute` overwrites it. ## Out of scope for this workflow diff --git a/plugins/asta/skills/research-step/workflows/init.md b/plugins/asta/skills/research-step/workflows/init.md index fd11be3..408c60f 100644 --- a/plugins/asta/skills/research-step/workflows/init.md +++ b/plugins/asta/skills/research-step/workflows/init.md @@ -1,6 +1,6 @@ # Workflow: init -Bootstrap the environment for a research session: install `bd` and `jq`, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready. +Bootstrap the environment for a research session: install `bd`, `jq`, PyYAML, and jsonschema, run `bd init`, wire beads to the project's git remote for cross-machine sync, and verify the staleness check works. This is the only workflow that may install or configure tools; `plan`, `update-summary`, and `execute` assume the environment is ready. After environment setup, hand off to **plan** to bootstrap the mission epic and initial frontier. @@ -32,12 +32,16 @@ Server mode (`bd init --server`) is out of scope: it requires running a Dolt sql - If no Dolt refs exist on the remote, surface the situation to the user with three options: (a) `bd import .beads/issues.jsonl` (fast, but discards Dolt history and any state newer than the export), (b) configure a Dolt remote and `bd dolt push` from another machine that has the live DB, then retry, (c) abort. - Pick one path only after explicit user confirmation. Never auto-import. -4. **Verify the staleness check works.** +4. **Ensure `python3` can import `yaml` (PyYAML) and `jsonschema`.** `scripts/task-output-keys.sh` (used by `create-task.sh` and `validate-output.sh`) parses `assets/schemas.yaml` with PyYAML; `validate-output.sh` deep-validates each task's `output_json` against the compiled schemas in `assets/compiled/` with jsonschema, and hard-fails (exit 5) without it. + - Probe with `python3 -c 'import yaml, jsonschema'`. If it succeeds, skip. + - Otherwise install what's missing: `python3 -m pip install --user pyyaml jsonschema` (or the platform equivalent, e.g. `apt-get install python3-yaml python3-jsonschema`). Re-probe; if it still fails, abort and ask the user. + +5. **Verify the staleness check works.** - Run `scripts/summary-check.sh`. It hashes the sorted IDs of currently-open issues and compares against `summary.md`'s frontmatter. Backend-agnostic — beads can use whichever storage it likes. - Requires `jq` on PATH; if missing, install it (`brew install jq`, `apt-get install jq`, etc.) and retry. - At init time `summary.md` does not yet exist, so the script will print `status: missing` and exit 1 — that's fine; **update-summary** will create the file later. `status: no-tools` (exit 3) means abort and ask the user. -5. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**. +6. **Hand off to plan.** Per the router's chaining rule, run the **plan** workflow next. It will detect that no epic exists yet and bootstrap one from `mission.md`. If `mission.md` is missing, **plan** will route the user back to **brainstorm**. ## Cross-machine transfer diff --git a/plugins/asta/skills/research-step/workflows/plan.md b/plugins/asta/skills/research-step/workflows/plan.md index c5ffb2d..444ee90 100644 --- a/plugins/asta/skills/research-step/workflows/plan.md +++ b/plugins/asta/skills/research-step/workflows/plan.md @@ -1,99 +1,94 @@ # Workflow: plan -Create or extend the research graph. The single home for "design the next set of typed tasks." Two modes, selected from state: +Create or extend the research graph. The flow chains live in `assets/schemas.yaml` (`flows`) — plan reads them, it does not hardcode the sequence. Two modes: -- **bootstrap** — no epic exists yet. Create the mission epic and the initial frontier (scope, definitions, literature_review) from `mission.md`. -- **replan** — an epic exists. Add downstream tasks based on a recently-closed task's output, or on user direction. +- **bootstrap** — no epic yet: pick a flow and lay its first step(s). +- **replan** — an epic exists: after a step closes, add the next step(s) in its flow chain. -Always chains to **update-summary** afterward so `summary.md` reflects the new graph. +Always chains to **update-summary** afterward. ## Preconditions -- `bd` is installed and `.beads/` is initialized. If not, run **init** first. -- For **bootstrap**: `mission.md` exists and is non-empty, and `scripts/epic-root.sh` reports `status: none` (no epic yet). If `mission.md` is missing, abort and route the user to **brainstorm** to draft one. -- For **replan**: `scripts/epic-root.sh` reports `status: found` (an epic exists). If a specific source task was supplied (typically by `execute` chaining into this workflow), it is closed and has a populated `metadata.research_step.output`. +- `bd` installed and `.beads/` initialized (else run **init**). +- **bootstrap**: `mission.md` exists; no epic yet (`scripts/epic-root.sh` → `none`). +- **replan**: an epic exists; either `execute` supplied the closed source task, or the user named what to extend. -## Issue metadata convention +## Task metadata -Every task issue carries: +Create task leaves with `scripts/create-task.sh <parent> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]`. It sets `metadata.research_step = {flow, task_type, inputs, output_schema_version, output_json: null, output_markdown: null}` and a **brief one-line `description`** (it rejects a missing, multi-line, or over-long description). `execute` later publishes `output_json` (the structured result) and `output_markdown` (the narrative) via `close-task.sh`; the description is not overwritten. The epic carries `epic_root: true`; group nodes (loops, fan-outs, branches) are epics created with `bd create --parent <parent> -t epic` (no task_type, no description rules). A session may run several flows — the flow is per task, not per epic. -```json -{ - "research_step": { - "task_type": "<scope|definitions|literature_review|hypothesis|experiment_design|evidence_gathering|analysis|synthesis>", - "inputs": ["bd-xxxx", "bd-yyyy"], - "output_schema_version": 1, - "output": null - } -} -``` +## Indentation is the tree -The mission epic additionally carries `epic_root: true`. +The flow in `assets/schemas.yaml` is an indented outline, and the beads graph you build **is that same outline**: each indentation level in the flow becomes one parent-child level in beads. Build it with `bd create --parent`, walking the flow top-down, so hierarchical ids (`wf`, `wf.1`, `wf.1.1`, …) encode the outline position. There are **no `blocks`/`deps` edges** — ordering is the id order, because you create nodes in the order they run. -## Mode selection +Reading a flow node: -1. Run `scripts/epic-root.sh`. `status: none` → **bootstrap**. -2. `status: found` (epic ID on the `id:` line) → **replan**. If the caller named a specific closed task (typical when `execute` chains here), use it as the source. Else, ask the user which closed task to plan around or which subgraph to extend, then proceed. +- A node with a `chain` is a **step** → a `task` issue tagged with its `task_type`. Its `input:` names the upstream steps in this session whose issues you wire as the task's `inputs` (the same task type takes different inputs in different flows, so inputs live on the step, not the task). +- A node without a `chain` (only child nodes and a `mission`) is a **group** → a non-executable `epic` issue (a flow, a loop, or a fan-out). The keys `mission`, `input`, and `chain` are never nodes. +- A `chain` item of the form `{workflow: <flow>, mission: <text>}` expands that node into the named sub-flow's own tree. +- A **fan-out group** (`replication`, `theory_generation`, `verification`, `testing`) inserts **one branch level per item**: the group node, then one branch epic per item, then the group's steps repeated under each branch. The group `mission` names what to branch on. -## Bootstrap mode +The reproduction flow therefore produces this tree (ids illustrative; `[group]` nodes are epics, leaves are tasks): -1. **Verify mission.** Read `mission.md`. If missing or empty, abort and suggest **brainstorm**. -2. **Create the epic.** - ``` - bd create --type=epic --title="<one-line summary of mission.md>" --description="$(cat mission.md)" - bd update <epic-id> --metadata '{"research_step":{"epic_root":true}}' - ``` -3. **Create the initial frontier.** Three `task` issues with the metadata convention above: - - `scope: <one-line>` — `inputs: []` - - `definitions: <one-line>` — `inputs: [<scope-id>]` - - `literature_review: <one-line>` — `inputs: [<scope-id>, <definitions-id>]` -4. **Add edges.** - - `parent-child` from each frontier task to the epic - - `blocks`: scope → definitions; scope → literature_review; definitions → literature_review -5. **Report.** Print the epic ID and the three task IDs. +``` +wf [epic] <mission> + wf.1 [loop] reproduction + wf.1.1 data_driven_discovery + wf.1.2 law_extraction + wf.1.3 evidence_gathering + wf.1.4 [fan-out] replication one branch per law + wf.1.4.1 [branch] <law> + wf.1.4.1.1 experiment_design + wf.1.4.1.2 analysis + wf.1.4.1.3 audit + wf.1.4.1.4 adjudicate + wf.1.4.2 [branch] <law> … + wf.1.5 reproduction_synthesis +``` -## Replan mode +The composed flow nests the same way: `wf.1` data_provenance, `wf.2` reproduction, `wf.3` theorizer, `wf.4` verification (one branch per testable theory), `wf.5` verification_synthesis, `wf.6` gap_synthesis, `wf.7` final_synthesis. Each sub-flow ends in its own synthesis step that emits a report (provenance_report, reproduction_report, theory_report, verification_report); gap_synthesis aggregates their gaps into data_gaps_report and final_synthesis writes the theory-led research_report. -Read the source task's task_type and output: +## Ordering and closing (no edges) -``` -bd show <source-id> --json | jq '.[0].metadata.research_step.task_type' -bd show <source-id> --json | jq '.[0].metadata.research_step.output' -``` +- **Next task = the `next:` line of `scripts/next-task.sh`** (open issues with a `task_type`, **numerically** sorted by hierarchical id — `wf.1.2` before `wf.1.10`). Groups (no `task_type`) are never executed. `execute` and `update-summary` both use this script, so they never disagree about what runs next. +- Because you create in execution order, sequential steps sort before later ones; parallel branches (`wf.1.4.1`, `wf.1.4.2`, …) are independent so any order is fine; a fan-in step like `reproduction_synthesis` (`wf.1.5`) is created after its branches, so it sorts last. +- A group closes when its last child closes — `scripts/close-task.sh` does this automatically, walking up and closing each ancestor whose children are all closed. It never closes the **epic root**: "root open, no open tasks" is the session-complete state. Never close groups by hand. + +## Static vs data-dependent fan-outs + +- **Static** (`theory_generation` by objective): both branches are known up front → create them together. +- **Data-dependent** (`replication` per law, `verification` per testable theory, `testing` per hypothesis): the branch set is known only after the upstream step closes (`law_extraction`, `testability_triage`, `hypothesis_formation`). Lay only what you can; `execute` closes the upstream step; then replan reads its output and creates the branches under the group. Never pre-create data-dependent branches. For any branch the data cannot support, record why rather than dropping it. + +## Gates (replan) -Apply this table: +- When `experiment_design` closes (a `replication` or `testing` branch): `feasibility` of `feasible`/`proxy_only` → create the branch's remaining steps — in `testing`, also `data_acquisition` when the design names data not yet in hand — i.e. `[data_acquisition,] analysis`, `audit`, `adjudicate`; `data_unavailable`/`construct_mismatch` → create only `adjudicate` (it records `outcome: n/a`, `testability: untestable`) plus a `data_acquisition` task under the branch holding the gap. No analysis is created. +- When `testability_triage` closes: create a `verification` branch only per theory in `testable_theory_ids`; the rest become `next_steps` in the final report. +- When `hypothesis_formation` closes: create one `testing` branch per hypothesis. -| Source task_type | Action | -|---|---| -| `literature_review` | For each gap in `output.gaps`, create a `hypothesis` task with `inputs: [<scope-id>, <source-id>]`. Edges: `parent-child` to epic; `blocks` from the source. **Populate `metadata.research_step.output` at creation time** (see below) and close the issue immediately — the gap text already contains the statement, rationale, and prediction in prose, so there is no separate `execute` pass for hypotheses. | -| `hypothesis` | Create the chain `experiment_design` → `evidence_gathering` → `analysis`, each `blocks` the next. `experiment_design` depends on the hypothesis (via `inputs`); `analysis` depends on both the hypothesis and the new `evidence_gathering`. All three get `parent-child` to the epic. | -| `analysis` | If every `hypothesis` in the epic now has a closed `analysis`, create one `synthesis` task with `inputs` listing all analysis IDs and the scope ID. `parent-child` to epic; `blocks` from each analysis. Otherwise no-op. | -| `synthesis` | If `output.open_questions` is non-empty, **stop and ask the user** before creating new `hypothesis` tasks. If approved, create them with a `discovered-from` edge back to the synthesis (in addition to the usual edges). | -| `scope`, `definitions`, `experiment_design`, `evidence_gathering` | No replan. Report no-op and stop. | +## Bootstrap -If invoked without a source task and the user has not specified what to plan, do not invent work — ask, or stop. +1. Read `mission.md`. **Pick a flow** from `flows` that fits it (or compose your own chain of `tasks`); ask the user if it's unclear. +2. **Resolve the session config.** Start from the `config:` defaults in `assets/schemas.yaml`; apply any overrides from a `## Config` section in `mission.md` (one `key: value` line each; unknown keys are an error — surface them). The resolved map is pinned in the next step and never re-resolved mid-session. +3. `bd create -t epic` the root from the mission, tagged with metadata `{"research_step": {"epic_root": true, "flow": "<flow>", "config": {<resolved config>}}}`. Create each loop/group epic with `bd create --parent <its parent>` as you reach it, so the id hierarchy matches the flow's indentation. +4. **Create the frontier — and only the frontier.** Lay the flow's first step(s) with `scripts/create-task.sh <group> <task_type> <flow> "<title>" "<brief-description>" [input-id ...]` (a brief one-line description is required). **No edges.** Do not pre-create downstream steps or data-dependent branches; replan adds them once their inputs close. +5. Report the epic id, the flow, the resolved config, the loop/group ids, and the frontier task ids. -### Auto-resolving hypothesis tasks +## Replan -When creating a `hypothesis` from a literature_review gap: +When a step closes, create the next node(s) under their parent, in flow order: -1. Derive the four output fields directly from the gap text and surrounding `literature_review` output (`bd show <source-id> --json | jq '.[0].metadata.research_step.output'`): - - `statement` — `H_n: <one-sentence claim>` - - `rationale` — why this gap implies the claim - - `falsifiable_prediction` — what observation would refute it - - `expected_evidence` — list of concrete evidence types that would support it -2. Validate with `scripts/validate-output.sh hypothesis <metadata-json-file>` before persisting. -3. Persist with `scripts/write-meta.sh` + `bd update <id> --metadata @<path>`, then `bd close <id>`. +- Create each step with `create-task.sh`. Its `inputs` are the upstream issue ids it reads, for `execute`'s input-gathering — not for scheduling; the step's `input:` list in `schemas.yaml` names **which** upstream steps to wire. +- A fan-out group: `bd create --parent <group> -t epic` one branch epic per item, then the branch steps under each via `create-task.sh` — **but a gated group lays only the steps up to its gate**: under a `replication` or `testing` branch create only `experiment_design`; the Gate below creates the rest when it closes. Ungated branches (`verification`: analysis, audit, adjudicate; `theory_generation`: theory_formation) get all their steps at branch creation. Record why for any branch the data can't support, rather than skipping it. +- Apply the **Gates** rules above — they are the only creator of post-gate steps, so nothing is double-created. +- The closing synthesis of a sub-flow (`provenance_synthesis`, `reproduction_synthesis`, `theory_synthesis`, `verification_synthesis`, `hypothesis_synthesis`, `discovery_synthesis`) is created after its branches, so it sorts last; `gap_synthesis` and `final_synthesis` sort after all sub-flows. These are distinct task types, each with its own report output shape. -If a gap is too thin to fill these fields without inventing content, **do not auto-resolve** — leave the hypothesis open and surface it to the user. Genuine ambiguity is the one case where a separate `execute` pass is warranted. +Stop at the end of the flow. If the closed step has nothing downstream, report no-op. ## After either mode -Hand off to **update-summary** so `summary.md` reflects the new state. +Hand off to **update-summary**. There are no edges to verify — the parent-child tree is the whole structure. ## Out of scope -- Running tasks or producing outputs. That belongs to **execute**. -- Environment setup (installing `bd`/`jq`, `bd init`). That belongs to **init**. -- Editing `mission.md`. That belongs to **brainstorm**. -- Validating output quality. +- Running tasks or producing outputs (**execute**). +- Environment setup (**init**); editing `mission.md` (**brainstorm**); judging output quality. diff --git a/plugins/asta/skills/research-step/workflows/update-summary.md b/plugins/asta/skills/research-step/workflows/update-summary.md index a79f6ff..a96a9fa 100644 --- a/plugins/asta/skills/research-step/workflows/update-summary.md +++ b/plugins/asta/skills/research-step/workflows/update-summary.md @@ -15,13 +15,11 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is - **`status: no-tools`** — `bd` or `jq` is not on PATH. Abort and tell the user to run `init` (which installs both). 2. **Locate the epic.** `epic_id=$(scripts/epic-root.sh | sed -n 's/^id: //p')`. -3. **Gather state inline.** All you need to fill the template comes from a few `bd` queries: - - `bd list --json` for the full tree (issue_count, status partition). - - `bd ready --json` for the ready list (also drives the Next Steps section). - - `bd blocked --json` for the blocked count. - Project each list to `{id, task_type: .metadata.research_step.task_type, title}` with `jq` and partition by `.status`. +3. **Gather state inline.** + - `bd list --json --all --limit 0` for the full tree — `--all` because closed issues carry the results, `--limit 0` because bd truncates at 50 rows by default. Project to `{id, task_type: .metadata.research_step.task_type, title, status}` and partition by `.status`. + - `scripts/next-task.sh` for the **next task and the queue** (open task-type issues, numerically sorted by id — the same order `execute` uses). This replaces `bd ready`; there are no edges, so id order is the ordering signal. 4. **Get the timestamp.** `generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)`. -5. **Overwrite `summary.md`** using this template: +5. **Overwrite `summary.md`** using this template (sections come from the **new taxonomy** — flows, laws, theories, reports — not from any per-flow hardcoding; render what the closed tasks' `output_json` actually contains): ```markdown --- @@ -29,7 +27,7 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is beads_epic: <bd-id> generated_at: <ISO-8601 UTC> issue_count: <n> - ready_count: <n> + open_task_count: <n> --- # <mission title> @@ -37,37 +35,29 @@ Regenerate `summary.md` from beads. Idempotent and safe to run anytime. This is ## Mission <verbatim mission.md, or one-paragraph summary if long> - ## Research Question & Scope - <from scope issue's output, or "pending" if not yet closed> + ## Flow + <one line per flow this session runs (from task metadata `flow`), with where it + stands — e.g. "reproduction — replication branches 2/5 closed, synthesis pending"> - ## Operational Definitions - <from definitions issue's output> + ## Results so far + <one subsection per closed `*_synthesis` task: the report's `headline` plus a link + to its `report_path`. Before any synthesis has closed, instead give one bullet per + closed task: "<bd-id> [<task_type>]: <one-line outcome from output_json>" — e.g. + laws extracted, datasets acquired, theories formed, verdicts finalized.> - ## Related Work - <literature_review.output.key_findings as bullets; link to summary_path> - - ## Hypotheses - <one subsection per hypothesis issue: "H_n: <statement>" plus current verdict from its analysis if closed> - - ## Experimental Designs - <one subsection per experiment_design, grouped under its hypothesis> - - ## Results Summary - <table: hypothesis | verdict | confidence | analysis-id> - - ## Open Questions - <synthesis.output.open_questions if synthesis exists, else aggregated from in-flight notes> + ## Gaps + <the `gaps` rows from closed report outputs (item — missing_data — severity), + or "none recorded"> ## Status - Closed: <n> - In progress: <n> — IDs: <list> - - Ready: <n> — IDs: <list> - - Blocked: <n> + - Open tasks: <n> — next: <`next:` from next-task.sh>; queue: <`queue:` line> ### Next Steps - <from `bd ready --json`: one bullet per ready issue, formatted as + <the queue from next-task.sh in order, one bullet each: "- <bd-id> [<task_type>]: <title> — <one-line summary of the action this task will take>". - If `bd ready` is empty, write "No ready tasks — graph is blocked or complete."> + If next-task.sh prints `next: none`, write "No open tasks — flow complete."> ``` 6. **Report.** Print whether the file was rewritten and the snapshot hash. (The "already fresh" case exited at step 1.) @@ -79,4 +69,4 @@ Any reader (human or agent) checks freshness by running `scripts/summary-check.s ## Out of scope for this workflow - Mutating beads. `update-summary` is read-only against `.beads/`. -- Re-planning. Even if `bd ready` is empty and the graph is incomplete, `update-summary` does not create issues. +- Re-planning. Even if no open tasks remain and the graph is incomplete, `update-summary` does not create issues. diff --git a/scripts/compile-schemas.py b/scripts/compile-schemas.py new file mode 100644 index 0000000..d237028 --- /dev/null +++ b/scripts/compile-schemas.py @@ -0,0 +1,807 @@ +#!/usr/bin/env python3 +"""compile-schemas.py — compiler for research-step's assets/schemas.yaml. + +Three stages: + parse — load schemas.yaml and parse the type mini-DSL and the flow trees. + check — resolve every cross-reference: type fields -> types/enums/builtins, + task outputs -> types, flow steps -> tasks, {workflow: X} chain + items -> flows, and every `input:` entry -> an upstream node in the + flow's expanded graph (or in some flow that embeds it). Also diffs + scripts/task-output-keys.sh against the compiler's own answer. + emit — assets/compiled/<task>.schema.json (one self-contained JSON Schema + per task), assets/compiled/<flow>.mmd (one fully-expanded mermaid + diagram per flow), and assets/compiled/flows.json (every flow as a + machine-readable graph — nodes, edges, chain commands, missions — + with the task schemas embedded, for downstream renderers). + +Contract notes encoded here (they mirror the comments in schemas.yaml): + - top-level output_json is closed (additionalProperties: false) — byproducts + go in artifacts; + - nested objects are open (additionalProperties: true) — agent payloads nest + verbatim, extra nested fields are always permitted; + - a `?` suffix on a field name marks it optional (e.g. `mcts_provenance?:`); + unmarked fields are required. + +Exit: 0 ok (warnings allowed) · 1 errors (all printed). +""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path + +SCHEMA_VERSION = 2 +# Shape of assets/compiled/flows.json. Bump when the JSON structure changes; +# downstream renderers gate on it before drawing anything. +FLOWS_FORMAT_VERSION = 1 +BUILTINS = { + "string": {"type": "string"}, + "number": {"type": "number"}, + "boolean": {"type": "boolean"}, + "object": {"type": "object"}, +} +RESERVED_NODE_KEYS = {"mission", "input", "chain", "replan"} +JSON_DIALECT = "https://json-schema.org/draft/2020-12/schema" +GENERATED_NOTE = ( + "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit" +) + + +def field_name(raw): + """Split a DSL field name into (name, optional).""" + if raw.endswith("?"): + return raw[:-1], True + return raw, False + + +class Node: + """One node of a flow tree (step, group, or sub-flow embedding).""" + + def __init__(self, name, kind, parent, origin): + self.name = name + self.kind = kind # step | group | embed + self.parent = parent + self.origin = origin # (flow_name, path-within-that-flow tuple) + self.mission = "" + self.inputs = [] + self.chain_cmds = [] + self.workflow = None # embed only: the flow it expands + self.replan = False + self.children = [] + self.preorder = -1 + + def ancestors(self): + n = self.parent + while n is not None: + yield n + n = n.parent + + def subtree(self): + yield self + for c in self.children: + yield from c.subtree() + + def path(self): + parts = [] + n = self + while n.parent is not None: + parts.append(n.name) + n = n.parent + return tuple(reversed(parts)) + + +class Compiler: + def __init__(self, doc): + self.doc = doc + self.errors = [] + self.warnings = [] + self.enums = {} + self.types = {} + self.tasks = {} + self.flows_raw = {} + + def err(self, ctx, msg): + self.errors.append(f"{ctx}: {msg}") + + def warn(self, ctx, msg): + self.warnings.append(f"{ctx}: {msg}") + + # ---------- parse + check: sections ---------- + + def check_document(self): + d = self.doc + if not isinstance(d, dict): + self.err("document", "schemas.yaml is not a mapping") + return + for key in ("version", "config", "enums", "types", "tasks", "flows"): + if key not in d: + self.err("document", f"missing top-level section '{key}'") + if d.get("version") != SCHEMA_VERSION: + self.err("version", f"expected {SCHEMA_VERSION}, got {d.get('version')!r}") + for k in d: + if k not in ("version", "config", "enums", "types", "tasks", "flows"): + self.err("document", f"unknown top-level section '{k}'") + + config = d.get("config") or {} + if not isinstance(config, dict): + self.err("config", "must be a mapping of key: scalar-default") + else: + for k, v in config.items(): + if not isinstance(v, (int, float, str, bool)): + self.err( + f"config.{k}", + f"default must be a scalar, got {type(v).__name__}", + ) + + enums = d.get("enums") or {} + if not isinstance(enums, dict): + self.err("enums", "must be a mapping of name: [values]") + enums = {} + for name, values in enums.items(): + ctx = f"enums.{name}" + if not isinstance(values, list) or not values: + self.err(ctx, "must be a non-empty list") + continue + if any(not isinstance(v, str) for v in values): + self.err(ctx, "values must all be strings") + continue + if len(set(values)) != len(values): + self.err(ctx, "values must be unique") + self.enums[name] = values + + types = d.get("types") or {} + if not isinstance(types, dict): + self.err("types", "must be a mapping of name: definition") + types = {} + self.types = dict(types) + + # name collisions across the three namespaces a reference can hit + for name in self.types: + if name in BUILTINS: + self.err(f"types.{name}", "collides with a builtin scalar name") + if name in self.enums: + self.err(f"types.{name}", "collides with an enum name") + for name in self.enums: + if name in BUILTINS: + self.err(f"enums.{name}", "collides with a builtin scalar name") + + for name, definition in self.types.items(): + self.check_expr(definition, f"types.{name}") + + tasks = d.get("tasks") or {} + if not isinstance(tasks, dict): + self.err("tasks", "must be a mapping of task_type: {output: ...}") + tasks = {} + for name, spec in tasks.items(): + ctx = f"tasks.{name}" + if not isinstance(spec, dict) or set(spec) != {"output"}: + self.err(ctx, "must be exactly {output: {...}}") + continue + output = spec["output"] + if not isinstance(output, dict) or not output: + self.err(ctx, "output must be a non-empty mapping") + continue + for key, expr in output.items(): + if key.endswith("?"): + self.err( + f"{ctx}.output", f"top-level key '{key}' may not be optional" + ) + self.check_expr(expr, f"{ctx}.output.{key}") + if output.get("artifacts") != ["artifact"]: + self.err(ctx, "output must include artifacts: [artifact]") + self.tasks[name] = output + + flows = d.get("flows") or {} + if not isinstance(flows, dict): + self.err("flows", "must be a mapping of flow_name: tree") + flows = {} + self.flows_raw = flows + + # ---------- the type expression DSL ---------- + + def check_expr(self, expr, ctx): + if isinstance(expr, str): + name, opt = field_name(expr) + if opt: + self.err( + ctx, + "the '?' optional marker belongs on field names, not type names", + ) + if ( + name not in BUILTINS + and name not in self.enums + and name not in self.types + ): + self.err(ctx, f"unknown type or enum '{name}'") + elif isinstance(expr, list): + if len(expr) != 1: + self.err( + ctx, + f"array type must have exactly one element type, got {len(expr)}", + ) + else: + self.check_expr(expr[0], f"{ctx}[]") + elif isinstance(expr, dict): + if not expr: + self.err(ctx, "inline object must declare at least one field") + for raw, sub in expr.items(): + name, _ = field_name(raw) + self.check_expr(sub, f"{ctx}.{name}") + else: + self.err(ctx, f"unsupported type expression: {expr!r}") + + def type_refs(self, expr): + """Yield the named types an expression references (no emission).""" + if isinstance(expr, str): + if expr in self.types: + yield expr + elif isinstance(expr, list): + for item in expr: + yield from self.type_refs(item) + elif isinstance(expr, dict): + for sub in expr.values(): + yield from self.type_refs(sub) + + def compile_expr(self, expr): + if isinstance(expr, str): + if expr in BUILTINS: + return dict(BUILTINS[expr]) + if expr in self.enums: + return {"enum": list(self.enums[expr])} + if expr in self.types: + return {"$ref": f"#/$defs/{expr}"} + return {} # already reported by check_expr + if isinstance(expr, list) and len(expr) == 1: + return {"type": "array", "items": self.compile_expr(expr[0])} + if isinstance(expr, dict): + return self.compile_object(expr) + return {} + + def compile_object(self, fields): + props, required = {}, [] + for raw, sub in fields.items(): + name, optional = field_name(raw) + props[name] = self.compile_expr(sub) + if not optional: + required.append(name) + # nested objects stay open: agent payloads nest verbatim, extra + # nested fields are always permitted (see the note in schemas.yaml) + return { + "type": "object", + "properties": props, + "required": required, + "additionalProperties": True, + } + + def type_closure(self, seed): + seen = set() + frontier = set(seed) + while frontier: + name = frontier.pop() + if name in seen or name not in self.types: + continue + seen.add(name) + frontier |= set(self.type_refs(self.types[name])) - seen + return seen + + def compile_task_schema(self, task): + output = self.tasks[task] + props, required = {}, [] + for key, expr in output.items(): + name, _ = field_name(key) + props[name] = self.compile_expr(expr) + required.append(name) + deps = {t for expr in output.values() for t in self.type_refs(expr)} + defs = { + name: self.compile_expr(self.types[name]) + for name in sorted(self.type_closure(deps)) + } + schema = { + "$schema": JSON_DIALECT, + "$id": f"asta-research-step/{task}.schema.json", + "$comment": GENERATED_NOTE, + "title": task, + "type": "object", + "properties": props, + "required": required, + # top level is closed: byproducts go in artifacts + "additionalProperties": False, + } + if defs: + schema["$defs"] = defs + return schema + + # ---------- flows: parse ---------- + + def parse_flow(self, flow_name): + raw = self.flows_raw[flow_name] + ctx = f"flows.{flow_name}" + root = Node(flow_name, "group", None, (flow_name, ())) + if not isinstance(raw, dict): + self.err(ctx, "flow must be a mapping") + return root + root.mission = raw.get("mission", "") + if not isinstance(root.mission, str) or not root.mission: + self.err(ctx, "flow must carry a mission") + for key, value in raw.items(): + if key == "mission": + continue + if key in RESERVED_NODE_KEYS: + self.err(ctx, f"flow root may not carry '{key}'") + continue + child = self.parse_node(key, value, root, flow_name, f"{ctx}.{key}") + if child is not None: + root.children.append(child) + if not root.children: + self.err(ctx, "flow has no steps") + return root + + def parse_node(self, name, raw, parent, flow_name, ctx): + if not isinstance(raw, dict): + self.err(ctx, "node must be a mapping") + return None + origin = (flow_name, parent.origin[1] + (name,)) + mission = raw.get("mission", "") + if not isinstance(mission, str) or not mission: + self.err(ctx, "node must carry a mission") + + child_keys = [k for k in raw if k not in RESERVED_NODE_KEYS] + bad_children = [k for k in child_keys if not isinstance(raw[k], dict)] + for k in bad_children: + self.err(f"{ctx}.{k}", "unknown node attribute (children must be mappings)") + child_keys = [k for k in child_keys if k not in bad_children] + + inputs = raw.get("input", None) + if inputs is not None and ( + not isinstance(inputs, list) or any(not isinstance(i, str) for i in inputs) + ): + self.err(ctx, "input must be a list of node names") + inputs = [] + + replan = raw.get("replan", False) + if not isinstance(replan, bool): + self.err(ctx, "replan must be a boolean") + replan = False + if replan and "chain" in raw: + self.err(ctx, "replan is only valid on groups (branches created at replan)") + + if "chain" not in raw: # group + node = Node(name, "group", parent, origin) + node.mission = mission + node.replan = replan + if inputs is not None: + self.err(ctx, "groups may not carry input — inputs live on steps") + if not child_keys: + self.err(ctx, "group has no children (a step needs a chain, even [])") + for k in child_keys: + child = self.parse_node(k, raw[k], node, flow_name, f"{ctx}.{k}") + if child is not None: + node.children.append(child) + return node + + chain = raw["chain"] + if not isinstance(chain, list): + self.err(ctx, "chain must be a list") + chain = [] + workflow_items = [c for c in chain if isinstance(c, dict)] + + if workflow_items: # sub-flow embedding + node = Node(name, "embed", parent, origin) + node.mission = mission + if len(chain) != 1: + self.err( + ctx, "a {workflow: ...} chain item must be the chain's only item" + ) + item = workflow_items[0] + if set(item) != {"workflow", "mission"}: + self.err(ctx, "workflow chain item must be exactly {workflow, mission}") + wf = item.get("workflow") + if wf not in self.flows_raw: + self.err(ctx, f"unknown workflow '{wf}'") + else: + node.workflow = wf + if inputs is not None: + self.err(ctx, "embed nodes may not carry input") + if child_keys: + self.err(ctx, "embed nodes may not have children") + return node + + # step + node = Node(name, "step", parent, origin) + node.mission = mission + node.inputs = list(inputs or []) + if name not in self.tasks: + self.err( + ctx, + f"step name is not a declared task (known: {', '.join(sorted(self.tasks))})", + ) + for c in chain: + if not isinstance(c, str): + self.err(ctx, f"chain item must be a string or {{workflow}}: {c!r}") + elif not c.startswith("asta "): + self.err( + ctx, f"chain command does not look like an asta command: '{c}'" + ) + else: + node.chain_cmds.append(c) + if child_keys: + self.err(ctx, f"steps may not have children: {', '.join(child_keys)}") + return node + + # ---------- flows: expansion + input resolution ---------- + + def expand_flow(self, flow_name, parsed, stack=()): + """Build a fresh expanded tree for flow_name; embeds become groups + wrapping the embedded flow's (recursively expanded) children.""" + if flow_name in stack: + self.err( + f"flows.{flow_name}", + f"workflow embedding cycle: {' -> '.join(stack + (flow_name,))}", + ) + return None + + def clone(node, parent): + copy = Node(node.name, node.kind, parent, node.origin) + copy.mission, copy.inputs = node.mission, list(node.inputs) + copy.chain_cmds, copy.replan = list(node.chain_cmds), node.replan + copy.workflow = node.workflow + if node.kind == "embed" and node.workflow: + inner = self.expand_flow(node.workflow, parsed, stack + (flow_name,)) + if inner is not None: + for c in inner.children: + c.parent = copy + copy.children.append(c) + else: + for c in node.children: + copy.children.append(clone(c, copy)) + return copy + + root = clone(parsed[flow_name], None) + for i, n in enumerate(root.subtree()): + n.preorder = i + return root + + @staticmethod + def resolve_input(step, name): + """Nearest-ancestor-scope, upstream-only resolution: walk outward from + the step; in each scope pick the closest preceding node with that name.""" + ancestors = set(step.ancestors()) + for scope in step.ancestors(): + candidates = [ + n + for n in scope.subtree() + if n.name == name and n.preorder < step.preorder and n not in ancestors + ] + if candidates: + return max(candidates, key=lambda n: n.preorder) + return None + + def check_flows(self): + parsed = {name: self.parse_flow(name) for name in self.flows_raw} + if self.errors: + # expansion and resolution on a broken parse just compounds noise + return {} + expanded = {name: self.expand_flow(name, parsed) for name in self.flows_raw} + if any(v is None for v in expanded.values()) or self.errors: + return {} + + # (origin_flow, origin_path, input) -> {context_flow: resolved?} + results = {} + for context, root in expanded.items(): + for node in root.subtree(): + for inp in node.inputs: + key = (node.origin[0], node.origin[1], inp) + results.setdefault(key, {})[context] = ( + self.resolve_input(node, inp) is not None + ) + + for (flow, path, inp), by_context in sorted(results.items()): + if not any(by_context.values()): + ctx = f"flows.{flow}.{'.'.join(path)}" + contexts = ", ".join(sorted(by_context)) + self.err( + ctx, + f"input '{inp}' does not resolve to any upstream node " + f"in any context (checked: {contexts})", + ) + return expanded + + # ---------- script compatibility ---------- + + def check_scripts(self, skill_dir): + script = Path(skill_dir) / "scripts" / "task-output-keys.sh" + if not script.is_file(): + self.err("scripts", f"{script} not found") + return + + def query(task): + return subprocess.run( + ["bash", str(script), task], capture_output=True, text=True + ) + + # each call is bash -> python3 -> full YAML parse; run them concurrently + with ThreadPoolExecutor(max_workers=8) as pool: + futures = {task: pool.submit(query, task) for task in sorted(self.tasks)} + bogus = pool.submit(query, "definitely_not_a_task") + + for task, future in futures.items(): + r = future.result() + if r.returncode != 0: + self.err( + f"scripts.task-output-keys.{task}", + f"exited {r.returncode}: {r.stderr.strip()}", + ) + continue + got = r.stdout.split() + want = [field_name(k)[0] for k in self.tasks[task]] + if got != want: + self.err( + f"scripts.task-output-keys.{task}", + f"key drift: script says [{' '.join(got)}], schema says [{' '.join(want)}]", + ) + if bogus.result().returncode != 3: + self.err( + "scripts.task-output-keys", + f"unknown task_type should exit 3, got {bogus.result().returncode}", + ) + + # ---------- mermaid emission ---------- + + @staticmethod + def node_id(node): + return re.sub(r"[^A-Za-z0-9_]", "_", "__".join(node.path())) or "root" + + @staticmethod + def label(text): + return text.replace('"', "#quot;") + + def emit_mermaid(self, flow_name, root): + lines = [ + f"%% {flow_name} — {GENERATED_NOTE}", + "%% solid arrows: input wiring · dashed: inputs satisfied outside this flow", + "flowchart TD", + " classDef replan stroke-dasharray: 6 4", + " classDef embed fill:#eef6ff,stroke:#6699cc", + " classDef external stroke-dasharray: 3 3,color:#888888", + ] + edges, externals = [], {} + + def walk(node, depth): + pad = " " * depth + nid = self.node_id(node) + if node.kind == "step": + text = node.name + if node.chain_cmds: + text += "<br/>" + " · ".join(node.chain_cmds) + lines.append(f'{pad}{nid}["{self.label(text)}"]') + return + title = node.name + if node.kind == "embed": + title += f" [flow: {node.workflow}]" + if node.replan: + title += " (at replan)" + lines.append(f'{pad}subgraph {nid}["{self.label(title)}"]') + for c in node.children: + walk(c, depth + 1) + lines.append(f"{pad}end") + if node.kind == "embed": + lines.append(f"{pad}class {nid} embed") + if node.replan: + lines.append(f"{pad}class {nid} replan") + + for child in root.children: + walk(child, 1) + + for node in root.subtree(): + for inp in node.inputs: + src = self.resolve_input(node, inp) + if src is not None: + edges.append(f" {self.node_id(src)} --> {self.node_id(node)}") + else: + ext = externals.setdefault( + inp, f"ext__{re.sub(r'[^A-Za-z0-9_]', '_', inp)}" + ) + edges.append(f" {ext} -.-> {self.node_id(node)}") + + for inp, ext in sorted(externals.items()): + lines.append(f' {ext}(["{self.label(inp)} (external)"]):::external') + lines.extend(edges) + return "\n".join(lines) + "\n" + + # ---------- flows.json emission ---------- + + def emit_flow_graph(self, root): + """One flow as a graph: the same expanded tree and input resolution the + mermaid emitter walks, as data. Nodes come in preorder so a node's + `parent` always appears earlier (the React Flow sub-flow contract).""" + nodes = [] + for node in root.subtree(): + if node is root: + continue + entry = { + "id": self.node_id(node), + "parent": None if node.parent is root else self.node_id(node.parent), + "kind": node.kind, + "name": node.name, + "mission": node.mission, + "replan": node.replan, + } + if node.kind == "step": + entry["task"] = node.name + entry["chain"] = list(node.chain_cmds) + entry["inputs"] = list(node.inputs) + elif node.kind == "embed": + entry["workflow"] = node.workflow + nodes.append(entry) + + edges, externals = [], {} + for node in root.subtree(): + for inp in node.inputs: + src = self.resolve_input(node, inp) + if src is not None: + source = self.node_id(src) + external = False + else: + source = externals.setdefault( + inp, f"ext__{re.sub(r'[^A-Za-z0-9_]', '_', inp)}" + ) + external = True + edges.append({ + "source": source, + "target": self.node_id(node), + "input": inp, + "external": external, + }) + # inputs satisfied outside this flow: synthetic nodes, like the + # mermaid emitter's dashed "(external)" stadiums + for inp, ext in sorted(externals.items()): + entry = { + "id": ext, + "parent": None, + "kind": "external", + "name": inp, + "mission": "", + "replan": False, + } + if inp in self.tasks: + entry["task"] = inp + nodes.append(entry) + return {"mission": root.mission, "nodes": nodes, "edges": edges} + + def emit_flows_doc(self, expanded, schemas): + """The whole flows.json document. Task schemas are embedded once, + keyed by task name (nodes reference them via their `task` field), so + a renderer needs no further fetches; `output` is the raw mini-DSL + expression for compact key/type display.""" + return { + "format_version": FLOWS_FORMAT_VERSION, + "schema_version": SCHEMA_VERSION, + "$comment": GENERATED_NOTE, + "tasks": { + task: {"output": self.tasks[task], "schema": schemas[task]} + for task in sorted(self.tasks) + }, + "flows": { + name: self.emit_flow_graph(root) for name, root in expanded.items() + }, + } + + # ---------- driver ---------- + + def run(self, skill_dir=None, script_check=True): + self.check_document() + expanded = self.check_flows() + + # unreachable-type warning: everything should hang off some task + if self.tasks: + seeds = { + t + for output in self.tasks.values() + for expr in output.values() + for t in self.type_refs(expr) + } + reachable = self.type_closure(seeds) + for name in self.types: + if name not in reachable: + self.warn(f"types.{name}", "not reachable from any task output") + + if script_check and skill_dir is not None and not self.errors: + self.check_scripts(skill_dir) + + schemas = {} + diagrams = {} + flows_doc = {} + if not self.errors: + schemas = {task: self.compile_task_schema(task) for task in self.tasks} + diagrams = { + name: self.emit_mermaid(name, root) for name, root in expanded.items() + } + flows_doc = self.emit_flows_doc(expanded, schemas) + return schemas, diagrams, flows_doc + + +def main(argv=None): + repo = Path(__file__).resolve().parent.parent + ap = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + ap.add_argument( + "--skill-dir", + type=Path, + default=repo / "skills" / "research-step", + help="the research-step skill directory (default: skills/research-step)", + ) + ap.add_argument( + "--no-script-check", + action="store_true", + help="skip the task-output-keys.sh compatibility check", + ) + ap.add_argument( + "--check", + action="store_true", + help="validate only; do not write assets/compiled/", + ) + args = ap.parse_args(argv) + + try: + import yaml + except ImportError: + print( + "compile-schemas: python3 cannot import yaml (PyYAML) — pip install pyyaml", + file=sys.stderr, + ) + return 1 + + schemas_path = args.skill_dir / "assets" / "schemas.yaml" + try: + with open(schemas_path) as f: + doc = yaml.safe_load(f) + except Exception as e: + print(f"compile-schemas: cannot read {schemas_path}: {e}", file=sys.stderr) + return 1 + + compiler = Compiler(doc) + schemas, diagrams, flows_doc = compiler.run( + skill_dir=args.skill_dir, script_check=not args.no_script_check + ) + + for w in compiler.warnings: + print(f"compile-schemas: warning: {w}", file=sys.stderr) + if compiler.errors: + for e in compiler.errors: + print(f"compile-schemas: error: {e}", file=sys.stderr) + print(f"compile-schemas: {len(compiler.errors)} error(s)", file=sys.stderr) + return 1 + + if not args.check: + out_dir = args.skill_dir / "assets" / "compiled" + out_dir.mkdir(parents=True, exist_ok=True) + for stale in ( + list(out_dir.glob("*.schema.json")) + + list(out_dir.glob("*.mmd")) + + list(out_dir.glob("flows.json")) + ): + stale.unlink() + for task, schema in schemas.items(): + (out_dir / f"{task}.schema.json").write_text( + json.dumps(schema, indent=2, sort_keys=True) + "\n" + ) + for flow, mmd in diagrams.items(): + (out_dir / f"{flow}.mmd").write_text(mmd) + (out_dir / "flows.json").write_text( + json.dumps(flows_doc, indent=2, sort_keys=True) + "\n" + ) + + print( + f"compile-schemas: ok — {len(compiler.types)} types, " + f"{len(schemas)} task schemas, {len(diagrams)} flow diagrams, flows.json" + + ("" if args.check else f" -> {args.skill_dir / 'assets' / 'compiled'}") + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/skills/research-step/assets/compiled/adjudicate.schema.json b/skills/research-step/assets/compiled/adjudicate.schema.json new file mode 100644 index 0000000..ccfb9d1 --- /dev/null +++ b/skills/research-step/assets/compiled/adjudicate.schema.json @@ -0,0 +1,144 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "adjudication": { + "additionalProperties": true, + "properties": { + "data_used": { + "type": "string" + }, + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "prespecified_check": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "outcome", + "testability", + "effect_size_observed", + "prespecified_check", + "independence_axes", + "data_used", + "evidence" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/adjudicate.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "adjudication": { + "$ref": "#/$defs/adjudication" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + } + }, + "required": [ + "adjudication", + "artifacts" + ], + "title": "adjudicate", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/analysis.schema.json b/skills/research-step/assets/compiled/analysis.schema.json new file mode 100644 index 0000000..55e557d --- /dev/null +++ b/skills/research-step/assets/compiled/analysis.schema.json @@ -0,0 +1,119 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "analysis": { + "additionalProperties": true, + "properties": { + "assumptions": { + "type": "string" + }, + "code": { + "type": "string" + }, + "final_answer": { + "type": "string" + } + }, + "required": [ + "final_answer", + "assumptions", + "code" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/analysis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "analysis": { + "$ref": "#/$defs/analysis" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + } + }, + "required": [ + "analysis", + "figures", + "artifacts" + ], + "title": "analysis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/audit.schema.json b/skills/research-step/assets/compiled/audit.schema.json new file mode 100644 index 0000000..ca21120 --- /dev/null +++ b/skills/research-step/assets/compiled/audit.schema.json @@ -0,0 +1,127 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "audit_report": { + "additionalProperties": true, + "properties": { + "artifacts_found": { + "items": { + "type": "string" + }, + "type": "array" + }, + "challenges": { + "items": { + "additionalProperties": true, + "properties": { + "check": { + "type": "string" + }, + "concern": { + "type": "string" + }, + "outcome": { + "type": "string" + } + }, + "required": [ + "concern", + "check", + "outcome" + ], + "type": "object" + }, + "type": "array" + }, + "recommended_adjustment": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "verdict_survives": { + "type": "boolean" + } + }, + "required": [ + "subject_id", + "challenges", + "artifacts_found", + "verdict_survives", + "recommended_adjustment" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/audit.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "audit_report": { + "$ref": "#/$defs/audit_report" + } + }, + "required": [ + "audit_report", + "artifacts" + ], + "title": "audit", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/cohort_assembly.schema.json b/skills/research-step/assets/compiled/cohort_assembly.schema.json new file mode 100644 index 0000000..4866540 --- /dev/null +++ b/skills/research-step/assets/compiled/cohort_assembly.schema.json @@ -0,0 +1,206 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "cohort": { + "additionalProperties": true, + "properties": { + "discovery_subset": { + "additionalProperties": true, + "properties": { + "definition": { + "type": "string" + }, + "n": { + "type": "number" + }, + "path": { + "type": "string" + } + }, + "required": [ + "definition", + "n", + "path" + ], + "type": "object" + }, + "exclusion_criteria": { + "type": "string" + }, + "holdout_subset": { + "additionalProperties": true, + "properties": { + "definition": { + "type": "string" + }, + "n": { + "type": "number" + }, + "path": { + "type": "string" + } + }, + "required": [ + "definition", + "n", + "path" + ], + "type": "object" + }, + "id": { + "type": "string" + }, + "inclusion_criteria": { + "type": "string" + }, + "research_question": { + "type": "string" + }, + "run_id": { + "type": "string" + }, + "sampling": { + "type": "string" + }, + "source_data_sources": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "research_question", + "inclusion_criteria", + "exclusion_criteria", + "sampling", + "source_data_sources", + "discovery_subset", + "holdout_subset", + "run_id" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/cohort_assembly.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "cohort": { + "$ref": "#/$defs/cohort" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "cohort", + "datasets", + "artifacts" + ], + "title": "cohort_assembly", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/data_acquisition.schema.json b/skills/research-step/assets/compiled/data_acquisition.schema.json new file mode 100644 index 0000000..0bec23c --- /dev/null +++ b/skills/research-step/assets/compiled/data_acquisition.schema.json @@ -0,0 +1,161 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "acquisition": { + "additionalProperties": true, + "properties": { + "access_status": { + "enum": [ + "acquired", + "open_unfetched", + "restricted", + "not_found" + ] + }, + "data_source_id": { + "type": "string" + }, + "dataset_id": { + "type": "string" + }, + "local_path": { + "type": "string" + }, + "validation_note": { + "type": "string" + } + }, + "required": [ + "data_source_id", + "access_status", + "local_path", + "dataset_id", + "validation_note" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/data_acquisition.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "acquisitions": { + "items": { + "$ref": "#/$defs/acquisition" + }, + "type": "array" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "acquisitions", + "datasets", + "artifacts" + ], + "title": "data_acquisition", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd new file mode 100644 index 0000000..cb56eed --- /dev/null +++ b/skills/research-step/assets/compiled/data_and_literature_grounded_theory_generation.mmd @@ -0,0 +1,92 @@ +%% data_and_literature_grounded_theory_generation — generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit +%% solid arrows: input wiring · dashed: inputs satisfied outside this flow +flowchart TD + classDef replan stroke-dasharray: 6 4 + classDef embed fill:#eef6ff,stroke:#6699cc + classDef external stroke-dasharray: 3 3,color:#888888 + subgraph data_provenance["data_provenance [flow: data_provenance]"] + data_provenance__provenance_search["provenance_search<br/>asta literature find · asta papers search"] + data_provenance__provenance_extraction["provenance_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"] + data_provenance__data_acquisition["data_acquisition<br/>asta documents · asta autodiscovery upload"] + data_provenance__provenance_synthesis["provenance_synthesis"] + end + class data_provenance embed + subgraph reproduction["reproduction [flow: reproduction]"] + reproduction__data_driven_discovery["data_driven_discovery<br/>asta autodiscovery run · asta autodiscovery experiments"] + reproduction__law_extraction["law_extraction"] + reproduction__evidence_gathering["evidence_gathering<br/>asta literature find · asta papers search · asta documents · asta autodiscovery upload"] + subgraph reproduction__replication["replication (at replan)"] + reproduction__replication__experiment_design["experiment_design<br/>asta experiment"] + reproduction__replication__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"] + reproduction__replication__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"] + reproduction__replication__adjudicate["adjudicate"] + end + class reproduction__replication replan + reproduction__reproduction_synthesis["reproduction_synthesis"] + end + class reproduction embed + subgraph theorizer["theorizer [flow: theorizer]"] + theorizer__evidence_extraction["evidence_extraction<br/>asta generate-theories build-extraction-schema · asta generate-theories find-and-extract"] + subgraph theorizer__theory_generation["theory_generation"] + theorizer__theory_generation__theory_formation["theory_formation<br/>asta generate-theories form-theory"] + end + theorizer__testability_triage["testability_triage"] + theorizer__novelty_assessment["novelty_assessment<br/>asta generate-theories evaluate-novelty"] + theorizer__theory_synthesis["theory_synthesis"] + end + class theorizer embed + subgraph verification["verification (at replan)"] + verification__analysis["analysis<br/>asta analyze-data submit · asta analyze-data poll"] + verification__audit["audit<br/>asta analyze-data submit · asta analyze-data poll"] + verification__adjudicate["adjudicate"] + end + class verification replan + verification_synthesis["verification_synthesis"] + gap_synthesis["gap_synthesis"] + final_synthesis["final_synthesis"] + data_provenance__provenance_search --> data_provenance__provenance_extraction + data_provenance__provenance_search --> data_provenance__data_acquisition + data_provenance__provenance_extraction --> data_provenance__data_acquisition + data_provenance__provenance_search --> data_provenance__provenance_synthesis + data_provenance__provenance_extraction --> data_provenance__provenance_synthesis + data_provenance__data_acquisition --> data_provenance__provenance_synthesis + reproduction__data_driven_discovery --> reproduction__law_extraction + reproduction__law_extraction --> reproduction__evidence_gathering + reproduction__law_extraction --> reproduction__replication__experiment_design + reproduction__evidence_gathering --> reproduction__replication__experiment_design + reproduction__replication__experiment_design --> reproduction__replication__analysis + reproduction__evidence_gathering --> reproduction__replication__analysis + reproduction__replication__analysis --> reproduction__replication__audit + reproduction__replication__experiment_design --> reproduction__replication__adjudicate + reproduction__replication__analysis --> reproduction__replication__adjudicate + reproduction__replication__audit --> reproduction__replication__adjudicate + reproduction__law_extraction --> reproduction__reproduction_synthesis + reproduction__replication --> reproduction__reproduction_synthesis + reproduction__law_extraction --> theorizer__evidence_extraction + reproduction__replication__adjudicate --> theorizer__evidence_extraction + theorizer__evidence_extraction --> theorizer__theory_generation__theory_formation + theorizer__theory_generation --> theorizer__testability_triage + reproduction__data_driven_discovery --> theorizer__testability_triage + reproduction__evidence_gathering --> theorizer__testability_triage + theorizer__testability_triage --> theorizer__novelty_assessment + theorizer__theory_generation --> theorizer__theory_synthesis + theorizer__novelty_assessment --> theorizer__theory_synthesis + theorizer__testability_triage --> theorizer__theory_synthesis + theorizer__testability_triage --> verification__analysis + reproduction__data_driven_discovery --> verification__analysis + reproduction__evidence_gathering --> verification__analysis + verification__analysis --> verification__audit + theorizer__testability_triage --> verification__adjudicate + verification__analysis --> verification__adjudicate + verification__audit --> verification__adjudicate + verification --> verification_synthesis + theorizer__novelty_assessment --> verification_synthesis + data_provenance__provenance_synthesis --> gap_synthesis + reproduction__reproduction_synthesis --> gap_synthesis + theorizer__theory_synthesis --> gap_synthesis + verification_synthesis --> gap_synthesis + data_provenance__provenance_synthesis --> final_synthesis + reproduction__reproduction_synthesis --> final_synthesis + theorizer__theory_synthesis --> final_synthesis + verification_synthesis --> final_synthesis + gap_synthesis --> final_synthesis diff --git a/skills/research-step/assets/compiled/data_driven_discovery.schema.json b/skills/research-step/assets/compiled/data_driven_discovery.schema.json new file mode 100644 index 0000000..14f65a7 --- /dev/null +++ b/skills/research-step/assets/compiled/data_driven_discovery.schema.json @@ -0,0 +1,152 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "experiment": { + "additionalProperties": true, + "properties": { + "analysis": { + "type": "string" + }, + "experiment_id": { + "type": "string" + }, + "hypothesis": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "required": [ + "experiment_id", + "status", + "hypothesis", + "analysis" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/data_driven_discovery.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + }, + "experiments": { + "items": { + "$ref": "#/$defs/experiment" + }, + "type": "array" + } + }, + "required": [ + "experiments", + "datasets", + "artifacts" + ], + "title": "data_driven_discovery", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/discovery_run.schema.json b/skills/research-step/assets/compiled/discovery_run.schema.json new file mode 100644 index 0000000..b7ac259 --- /dev/null +++ b/skills/research-step/assets/compiled/discovery_run.schema.json @@ -0,0 +1,170 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "empirical_law": { + "additionalProperties": true, + "properties": { + "construct": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "grouping_rationale": { + "type": "string" + }, + "id": { + "type": "string" + }, + "mcts_provenance": { + "additionalProperties": true, + "properties": { + "is_surprising": { + "type": "boolean" + }, + "posterior_belief": { + "type": "object" + }, + "prior_belief": { + "type": "object" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "surprise", + "is_surprising", + "prior_belief", + "posterior_belief" + ], + "type": "object" + }, + "source_node": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "construct", + "source_operationalization", + "source_node", + "effect_size_source", + "grouping_rationale" + ], + "type": "object" + }, + "experiment": { + "additionalProperties": true, + "properties": { + "analysis": { + "type": "string" + }, + "experiment_id": { + "type": "string" + }, + "hypothesis": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "required": [ + "experiment_id", + "status", + "hypothesis", + "analysis" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/discovery_run.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "empirical_laws": { + "items": { + "$ref": "#/$defs/empirical_law" + }, + "type": "array" + }, + "experiments": { + "items": { + "$ref": "#/$defs/experiment" + }, + "type": "array" + } + }, + "required": [ + "experiments", + "empirical_laws", + "artifacts" + ], + "title": "discovery_run", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/discovery_synthesis.schema.json b/skills/research-step/assets/compiled/discovery_synthesis.schema.json new file mode 100644 index 0000000..29cb31f --- /dev/null +++ b/skills/research-step/assets/compiled/discovery_synthesis.schema.json @@ -0,0 +1,271 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "discovery_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "interpretation": { + "type": "string" + }, + "laws": { + "items": { + "additionalProperties": true, + "properties": { + "deciding_experiment": { + "type": "string" + }, + "effect_size_discovery": { + "type": "string" + }, + "effect_size_holdout": { + "type": "string" + }, + "law_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "law_id", + "statement", + "surprise", + "outcome", + "deciding_experiment", + "effect_size_discovery", + "effect_size_holdout" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "next_steps": { + "items": { + "$ref": "#/$defs/next_run_proposal" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "run_id": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "run_id", + "laws", + "interpretation", + "next_steps", + "figures", + "gaps", + "links" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "next_run_proposal": { + "additionalProperties": true, + "properties": { + "data_needed": { + "type": "string" + }, + "expected_signature": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "priority": { + "enum": [ + "high", + "medium", + "low" + ] + }, + "tests": { + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "kind", + "title", + "tests", + "data_needed", + "expected_signature", + "priority" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/discovery_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "discovery_report": { + "$ref": "#/$defs/discovery_report" + } + }, + "required": [ + "discovery_report", + "artifacts" + ], + "title": "discovery_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/evidence_extraction.schema.json b/skills/research-step/assets/compiled/evidence_extraction.schema.json new file mode 100644 index 0000000..7a53a5b --- /dev/null +++ b/skills/research-step/assets/compiled/evidence_extraction.schema.json @@ -0,0 +1,132 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "extracted_data": { + "additionalProperties": true, + "properties": { + "extraction_schema_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "rows": { + "items": { + "additionalProperties": true, + "properties": { + "brief_description": { + "type": "string" + }, + "citation_title": { + "type": "string" + }, + "name_full": { + "type": "string" + }, + "name_short": { + "type": "string" + }, + "uuid": { + "type": "string" + } + }, + "required": [ + "name_short", + "name_full", + "brief_description", + "citation_title", + "uuid" + ], + "type": "object" + }, + "type": "array" + }, + "run_id": { + "type": "string" + } + }, + "required": [ + "id", + "run_id", + "paper_id", + "extraction_schema_id", + "rows" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/evidence_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "extracted_data": { + "$ref": "#/$defs/extracted_data" + } + }, + "required": [ + "extracted_data", + "artifacts" + ], + "title": "evidence_extraction", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/evidence_gathering.schema.json b/skills/research-step/assets/compiled/evidence_gathering.schema.json new file mode 100644 index 0000000..c310796 --- /dev/null +++ b/skills/research-step/assets/compiled/evidence_gathering.schema.json @@ -0,0 +1,121 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/evidence_gathering.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "datasets", + "artifacts" + ], + "title": "evidence_gathering", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/experiment_design.schema.json b/skills/research-step/assets/compiled/experiment_design.schema.json new file mode 100644 index 0000000..458fe42 --- /dev/null +++ b/skills/research-step/assets/compiled/experiment_design.schema.json @@ -0,0 +1,162 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "experiment_design": { + "additionalProperties": true, + "properties": { + "construct_equivalence": { + "enum": [ + "equivalent", + "proxy", + "mismatch" + ] + }, + "data_gap": { + "type": "string" + }, + "experiment_design_query": { + "type": "string" + }, + "experiment_name": { + "type": "string" + }, + "feasibility": { + "enum": [ + "feasible", + "proxy_only", + "data_unavailable", + "construct_mismatch" + ] + }, + "independent_operationalization": { + "type": "string" + }, + "plain_language_description": { + "type": "string" + }, + "prespecified": { + "additionalProperties": true, + "properties": { + "metric": { + "type": "string" + }, + "success_threshold": { + "type": "string" + }, + "test": { + "type": "string" + } + }, + "required": [ + "test", + "metric", + "success_threshold" + ], + "type": "object" + }, + "required_data": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "experiment_name", + "plain_language_description", + "source_operationalization", + "independent_operationalization", + "construct_equivalence", + "feasibility", + "required_data", + "data_gap", + "experiment_design_query", + "prespecified" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/experiment_design.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "experiment_design": { + "$ref": "#/$defs/experiment_design" + } + }, + "required": [ + "experiment_design", + "artifacts" + ], + "title": "experiment_design", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/final_synthesis.schema.json b/skills/research-step/assets/compiled/final_synthesis.schema.json new file mode 100644 index 0000000..b00f085 --- /dev/null +++ b/skills/research-step/assets/compiled/final_synthesis.schema.json @@ -0,0 +1,289 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "research_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "inference_chain": { + "items": { + "additionalProperties": true, + "properties": { + "chain": { + "items": { + "type": "string" + }, + "type": "array" + }, + "claim": { + "type": "string" + } + }, + "required": [ + "claim", + "chain" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "mechanism": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + }, + "grounded_in": { + "items": { + "type": "string" + }, + "type": "array" + }, + "statement": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "statement", + "grounded_in", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "report_path": { + "type": "string" + }, + "sub_reports": { + "items": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "one_line": { + "type": "string" + }, + "report_path": { + "type": "string" + } + }, + "required": [ + "kind", + "report_path", + "one_line" + ], + "type": "object" + }, + "type": "array" + }, + "tensions_and_surprises": { + "items": { + "additionalProperties": true, + "properties": { + "evidence": { + "type": "string" + }, + "observation": { + "type": "string" + }, + "where": { + "type": "string" + } + }, + "required": [ + "observation", + "where", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "theory_highlights": { + "items": { + "additionalProperties": true, + "properties": { + "claim": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "claim", + "novelty", + "outcome" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + }, + "what_was_done": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "report_path", + "title", + "headline", + "mechanism", + "theory_highlights", + "inference_chain", + "what_was_done", + "sub_reports", + "tensions_and_surprises", + "figures", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/final_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "research_report": { + "$ref": "#/$defs/research_report" + } + }, + "required": [ + "research_report", + "artifacts" + ], + "title": "final_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/flows.json b/skills/research-step/assets/compiled/flows.json new file mode 100644 index 0000000..907a432 --- /dev/null +++ b/skills/research-step/assets/compiled/flows.json @@ -0,0 +1,6657 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "flows": { + "auto_discovery": { + "edges": [ + { + "external": false, + "input": "cohort_assembly", + "source": "cohort_assembly", + "target": "discovery_run" + }, + { + "external": false, + "input": "discovery_run", + "source": "discovery_run", + "target": "replication__holdout_replication" + }, + { + "external": false, + "input": "cohort_assembly", + "source": "cohort_assembly", + "target": "replication__holdout_replication" + }, + { + "external": false, + "input": "discovery_run", + "source": "discovery_run", + "target": "discovery_synthesis" + }, + { + "external": false, + "input": "replication", + "source": "replication", + "target": "discovery_synthesis" + } + ], + "mission": "Source a cohort from the literature and run a fresh auto-ds discovery against a research question, then replicate each high-surprise candidate law on a held-out subset and report which held. Run as its own session in a separate workspace (own mission.md and .beads - a second epic root in one workspace breaks epic-root.sh); the research question (the intent) comes from that mission.md. The intent and the cohort are the most important inputs to a good discovery run, so most of the work is front-loaded into cohort_assembly. This flow is distinct from `reproduction`, which imports an existing run rather than standing up a new one.", + "nodes": [ + { + "chain": [ + "asta literature find", + "asta documents", + "asta generate-theories find-and-extract", + "asta autodiscovery create", + "asta autodiscovery upload", + "asta autodiscovery metadata" + ], + "id": "cohort_assembly", + "inputs": [], + "kind": "step", + "mission": "Gather and cohort the data for discovery. Find the relevant papers, extract the numbers and the datasets they used, then source a cohort - fix inclusion/exclusion and sampling, and hold an independent subset back for replication. Validate the assembled data against its source papers (n, schema/variables, units, missingness); a dataset that fails validation is a gap, not an input. Stand up and upload the discovery run (autodiscovery create, upload, metadata). Emit the cohort - its discovery_subset, its held-out holdout_subset, and the stood-up run_id - alongside the registered datasets.", + "name": "cohort_assembly", + "parent": null, + "replan": false, + "task": "cohort_assembly" + }, + { + "chain": [ + "asta autodiscovery submit", + "asta autodiscovery experiments" + ], + "id": "discovery_run", + "inputs": [ + "cohort_assembly" + ], + "kind": "step", + "mission": "Run discovery against the original question with the cohort as data (config n_experiments, set in the run metadata). Fetch the experiments; the highest-surprise nodes are the candidate laws worth replicating. Emit those candidate laws (empirical_law identity records, grounded in the run's surprise signal) alongside the raw experiments. No separate law_extraction step - the high-surprise nodes are the laws.", + "name": "discovery_run", + "parent": null, + "replan": false, + "task": "discovery_run" + }, + { + "id": "replication", + "kind": "group", + "mission": "One branch per high-surprise candidate law (created at replan, once discovery_run has named them). Replicate that law independently on the held-out subset.", + "name": "replication", + "parent": null, + "replan": true + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "replication__holdout_replication", + "inputs": [ + "discovery_run", + "cohort_assembly" + ], + "kind": "step", + "mission": "Replicate the law on the held-out subset - one DataVoyager run per law, in parallel (at most config max_parallel_dv_runs concurrent submissions). The verdict comes from this replication, not from the discovery run - emit an adjudication referencing the law id (outcome held/partial/failed/underpowered, or n/a when it could not be tested). Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.", + "name": "holdout_replication", + "parent": "replication", + "replan": false, + "task": "holdout_replication" + }, + { + "chain": [], + "id": "discovery_synthesis", + "inputs": [ + "discovery_run", + "replication" + ], + "kind": "step", + "mission": "Fan the branches in. Write discovery_report - open with the run header (run_id, n_experiments, discovery and holdout cohort sizes), give each law its held-out outcome with the experiment that decided it and both effect sizes (discovery vs held-out, joined from the laws and their adjudications - the pair shows replication shrinkage), write the interpretation (what the run means against the question that motivated it), include a discovery-vs-holdout effect figure, then propose next_steps. A failed law is a result, not a gap.", + "name": "discovery_synthesis", + "parent": null, + "replan": false, + "task": "discovery_synthesis" + } + ] + }, + "data_and_literature_grounded_theory_generation": { + "edges": [ + { + "external": false, + "input": "provenance_search", + "source": "data_provenance__provenance_search", + "target": "data_provenance__provenance_extraction" + }, + { + "external": false, + "input": "provenance_search", + "source": "data_provenance__provenance_search", + "target": "data_provenance__data_acquisition" + }, + { + "external": false, + "input": "provenance_extraction", + "source": "data_provenance__provenance_extraction", + "target": "data_provenance__data_acquisition" + }, + { + "external": false, + "input": "provenance_search", + "source": "data_provenance__provenance_search", + "target": "data_provenance__provenance_synthesis" + }, + { + "external": false, + "input": "provenance_extraction", + "source": "data_provenance__provenance_extraction", + "target": "data_provenance__provenance_synthesis" + }, + { + "external": false, + "input": "data_acquisition", + "source": "data_provenance__data_acquisition", + "target": "data_provenance__provenance_synthesis" + }, + { + "external": false, + "input": "data_driven_discovery", + "source": "reproduction__data_driven_discovery", + "target": "reproduction__law_extraction" + }, + { + "external": false, + "input": "law_extraction", + "source": "reproduction__law_extraction", + "target": "reproduction__evidence_gathering" + }, + { + "external": false, + "input": "law_extraction", + "source": "reproduction__law_extraction", + "target": "reproduction__replication__experiment_design" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "reproduction__evidence_gathering", + "target": "reproduction__replication__experiment_design" + }, + { + "external": false, + "input": "experiment_design", + "source": "reproduction__replication__experiment_design", + "target": "reproduction__replication__analysis" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "reproduction__evidence_gathering", + "target": "reproduction__replication__analysis" + }, + { + "external": false, + "input": "analysis", + "source": "reproduction__replication__analysis", + "target": "reproduction__replication__audit" + }, + { + "external": false, + "input": "experiment_design", + "source": "reproduction__replication__experiment_design", + "target": "reproduction__replication__adjudicate" + }, + { + "external": false, + "input": "analysis", + "source": "reproduction__replication__analysis", + "target": "reproduction__replication__adjudicate" + }, + { + "external": false, + "input": "audit", + "source": "reproduction__replication__audit", + "target": "reproduction__replication__adjudicate" + }, + { + "external": false, + "input": "law_extraction", + "source": "reproduction__law_extraction", + "target": "reproduction__reproduction_synthesis" + }, + { + "external": false, + "input": "replication", + "source": "reproduction__replication", + "target": "reproduction__reproduction_synthesis" + }, + { + "external": false, + "input": "law_extraction", + "source": "reproduction__law_extraction", + "target": "theorizer__evidence_extraction" + }, + { + "external": false, + "input": "adjudicate", + "source": "reproduction__replication__adjudicate", + "target": "theorizer__evidence_extraction" + }, + { + "external": false, + "input": "evidence_extraction", + "source": "theorizer__evidence_extraction", + "target": "theorizer__theory_generation__theory_formation" + }, + { + "external": false, + "input": "theory_generation", + "source": "theorizer__theory_generation", + "target": "theorizer__testability_triage" + }, + { + "external": false, + "input": "data_driven_discovery", + "source": "reproduction__data_driven_discovery", + "target": "theorizer__testability_triage" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "reproduction__evidence_gathering", + "target": "theorizer__testability_triage" + }, + { + "external": false, + "input": "testability_triage", + "source": "theorizer__testability_triage", + "target": "theorizer__novelty_assessment" + }, + { + "external": false, + "input": "theory_generation", + "source": "theorizer__theory_generation", + "target": "theorizer__theory_synthesis" + }, + { + "external": false, + "input": "novelty_assessment", + "source": "theorizer__novelty_assessment", + "target": "theorizer__theory_synthesis" + }, + { + "external": false, + "input": "testability_triage", + "source": "theorizer__testability_triage", + "target": "theorizer__theory_synthesis" + }, + { + "external": false, + "input": "testability_triage", + "source": "theorizer__testability_triage", + "target": "verification__analysis" + }, + { + "external": false, + "input": "data_driven_discovery", + "source": "reproduction__data_driven_discovery", + "target": "verification__analysis" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "reproduction__evidence_gathering", + "target": "verification__analysis" + }, + { + "external": false, + "input": "analysis", + "source": "verification__analysis", + "target": "verification__audit" + }, + { + "external": false, + "input": "testability_triage", + "source": "theorizer__testability_triage", + "target": "verification__adjudicate" + }, + { + "external": false, + "input": "analysis", + "source": "verification__analysis", + "target": "verification__adjudicate" + }, + { + "external": false, + "input": "audit", + "source": "verification__audit", + "target": "verification__adjudicate" + }, + { + "external": false, + "input": "verification", + "source": "verification", + "target": "verification_synthesis" + }, + { + "external": false, + "input": "novelty_assessment", + "source": "theorizer__novelty_assessment", + "target": "verification_synthesis" + }, + { + "external": false, + "input": "provenance_synthesis", + "source": "data_provenance__provenance_synthesis", + "target": "gap_synthesis" + }, + { + "external": false, + "input": "reproduction_synthesis", + "source": "reproduction__reproduction_synthesis", + "target": "gap_synthesis" + }, + { + "external": false, + "input": "theory_synthesis", + "source": "theorizer__theory_synthesis", + "target": "gap_synthesis" + }, + { + "external": false, + "input": "verification_synthesis", + "source": "verification_synthesis", + "target": "gap_synthesis" + }, + { + "external": false, + "input": "provenance_synthesis", + "source": "data_provenance__provenance_synthesis", + "target": "final_synthesis" + }, + { + "external": false, + "input": "reproduction_synthesis", + "source": "reproduction__reproduction_synthesis", + "target": "final_synthesis" + }, + { + "external": false, + "input": "theory_synthesis", + "source": "theorizer__theory_synthesis", + "target": "final_synthesis" + }, + { + "external": false, + "input": "verification_synthesis", + "source": "verification_synthesis", + "target": "final_synthesis" + }, + { + "external": false, + "input": "gap_synthesis", + "source": "gap_synthesis", + "target": "final_synthesis" + } + ], + "mission": "Source the papers and data behind an existing auto-ds run, reproduce its laws on independent data, theorize their cross-cutting mechanism, verify the testable theories on the data already in hand, then write the deliverable report.", + "nodes": [ + { + "id": "data_provenance", + "kind": "embed", + "mission": "Before reproducing, source the papers and datasets the run was built on so the underlying data becomes the data in hand.", + "name": "data_provenance", + "parent": null, + "replan": false, + "workflow": "data_provenance" + }, + { + "chain": [ + "asta literature find", + "asta papers search" + ], + "id": "data_provenance__provenance_search", + "inputs": [], + "kind": "step", + "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).", + "name": "provenance_search", + "parent": "data_provenance", + "replan": false, + "task": "provenance_search" + }, + { + "chain": [ + "asta generate-theories build-extraction-schema", + "asta generate-theories find-and-extract" + ], + "id": "data_provenance__provenance_extraction", + "inputs": [ + "provenance_search" + ], + "kind": "step", + "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.", + "name": "provenance_extraction", + "parent": "data_provenance", + "replan": false, + "task": "provenance_extraction" + }, + { + "chain": [ + "asta documents", + "asta autodiscovery upload" + ], + "id": "data_provenance__data_acquisition", + "inputs": [ + "provenance_search", + "provenance_extraction" + ], + "kind": "step", + "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.", + "name": "data_acquisition", + "parent": "data_provenance", + "replan": false, + "task": "data_acquisition" + }, + { + "chain": [], + "id": "data_provenance__provenance_synthesis", + "inputs": [ + "provenance_search", + "provenance_extraction", + "data_acquisition" + ], + "kind": "step", + "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.", + "name": "provenance_synthesis", + "parent": "data_provenance", + "replan": false, + "task": "provenance_synthesis" + }, + { + "id": "reproduction", + "kind": "embed", + "mission": "Import the provided auto-ds run (do not run a fresh one) and reproduce each law on independent data.", + "name": "reproduction", + "parent": null, + "replan": false, + "workflow": "reproduction" + }, + { + "chain": [ + "asta autodiscovery run", + "asta autodiscovery experiments" + ], + "id": "reproduction__data_driven_discovery", + "inputs": [], + "kind": "step", + "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.", + "name": "data_driven_discovery", + "parent": "reproduction", + "replan": false, + "task": "data_driven_discovery" + }, + { + "chain": [], + "id": "reproduction__law_extraction", + "inputs": [ + "data_driven_discovery" + ], + "kind": "step", + "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.", + "name": "law_extraction", + "parent": "reproduction", + "replan": false, + "task": "law_extraction" + }, + { + "chain": [ + "asta literature find", + "asta papers search", + "asta documents", + "asta autodiscovery upload" + ], + "id": "reproduction__evidence_gathering", + "inputs": [ + "law_extraction" + ], + "kind": "step", + "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.", + "name": "evidence_gathering", + "parent": "reproduction", + "replan": false, + "task": "evidence_gathering" + }, + { + "id": "reproduction__replication", + "kind": "group", + "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.", + "name": "replication", + "parent": "reproduction", + "replan": true + }, + { + "chain": [ + "asta experiment" + ], + "id": "reproduction__replication__experiment_design", + "inputs": [ + "law_extraction", + "evidence_gathering" + ], + "kind": "step", + "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.", + "name": "experiment_design", + "parent": "reproduction__replication", + "replan": false, + "task": "experiment_design" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "reproduction__replication__analysis", + "inputs": [ + "experiment_design", + "evidence_gathering" + ], + "kind": "step", + "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.", + "name": "analysis", + "parent": "reproduction__replication", + "replan": false, + "task": "analysis" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "reproduction__replication__audit", + "inputs": [ + "analysis" + ], + "kind": "step", + "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.", + "name": "audit", + "parent": "reproduction__replication", + "replan": false, + "task": "audit" + }, + { + "chain": [], + "id": "reproduction__replication__adjudicate", + "inputs": [ + "experiment_design", + "analysis", + "audit" + ], + "kind": "step", + "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.", + "name": "adjudicate", + "parent": "reproduction__replication", + "replan": false, + "task": "adjudicate" + }, + { + "chain": [], + "id": "reproduction__reproduction_synthesis", + "inputs": [ + "law_extraction", + "replication" + ], + "kind": "step", + "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.", + "name": "reproduction_synthesis", + "parent": "reproduction", + "replan": false, + "task": "reproduction_synthesis" + }, + { + "id": "theorizer", + "kind": "embed", + "mission": "Generate literature- and data-grounded theories of the reproduced laws and score their novelty.", + "name": "theorizer", + "parent": null, + "replan": false, + "workflow": "theorizer" + }, + { + "chain": [ + "asta generate-theories build-extraction-schema", + "asta generate-theories find-and-extract" + ], + "id": "theorizer__evidence_extraction", + "inputs": [ + "law_extraction", + "adjudicate" + ], + "kind": "step", + "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.", + "name": "evidence_extraction", + "parent": "theorizer", + "replan": false, + "task": "evidence_extraction" + }, + { + "id": "theorizer__theory_generation", + "kind": "group", + "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.", + "name": "theory_generation", + "parent": "theorizer", + "replan": false + }, + { + "chain": [ + "asta generate-theories form-theory" + ], + "id": "theorizer__theory_generation__theory_formation", + "inputs": [ + "evidence_extraction" + ], + "kind": "step", + "mission": "Form theories from the shared extraction store under this branch's objective.", + "name": "theory_formation", + "parent": "theorizer__theory_generation", + "replan": false, + "task": "theory_formation" + }, + { + "chain": [], + "id": "theorizer__testability_triage", + "inputs": [ + "theory_generation", + "data_driven_discovery", + "evidence_gathering" + ], + "kind": "step", + "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.", + "name": "testability_triage", + "parent": "theorizer", + "replan": false, + "task": "testability_triage" + }, + { + "chain": [ + "asta generate-theories evaluate-novelty" + ], + "id": "theorizer__novelty_assessment", + "inputs": [ + "testability_triage" + ], + "kind": "step", + "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.", + "name": "novelty_assessment", + "parent": "theorizer", + "replan": false, + "task": "novelty_assessment" + }, + { + "chain": [], + "id": "theorizer__theory_synthesis", + "inputs": [ + "theory_generation", + "novelty_assessment", + "testability_triage" + ], + "kind": "step", + "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.", + "name": "theory_synthesis", + "parent": "theorizer", + "replan": false, + "task": "theory_synthesis" + }, + { + "id": "verification", + "kind": "group", + "mission": "One branch per theory that testability_triage marked testable. There is no design step here - the prespecified proposed_test from triage (test, metric, success_threshold) is the commitment that analysis runs and adjudicate checks. The branch count is known only after triage closes, so these branches are created at replan.", + "name": "verification", + "parent": null, + "replan": true + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "verification__analysis", + "inputs": [ + "testability_triage", + "data_driven_discovery", + "evidence_gathering" + ], + "kind": "step", + "mission": "Run the theory's prespecified proposed_test on the data in hand - the source dataset registered by data_driven_discovery, plus any acquired datasets. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.", + "name": "analysis", + "parent": "verification", + "replan": false, + "task": "analysis" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "verification__audit", + "inputs": [ + "analysis" + ], + "kind": "step", + "mission": "Try to refute the verification analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.", + "name": "audit", + "parent": "verification", + "replan": false, + "task": "audit" + }, + { + "chain": [], + "id": "verification__adjudicate", + "inputs": [ + "testability_triage", + "analysis", + "audit" + ], + "kind": "step", + "mission": "Finalize the theory's outcome (held, partial, failed, underpowered, or n/a) and observed effect size from the analysis and audit, checked against the prespecified success_threshold from triage. Emit an adjudication referencing the theory id.", + "name": "adjudicate", + "parent": "verification", + "replan": false, + "task": "adjudicate" + }, + { + "chain": [], + "id": "verification_synthesis", + "inputs": [ + "verification", + "novelty_assessment" + ], + "kind": "step", + "mission": "Fan the verification branches in. Write verification_report - the novelty-by-verification matrix (each theory's claim, novelty, outcome, effect size, and whether the audit survived), what each prediction tested on the data in hand, and what could not be tested. Include the verification figure (one panel per theory tested) embedded in the report. Carry any gaps in `gaps`.", + "name": "verification_synthesis", + "parent": null, + "replan": false, + "task": "verification_synthesis" + }, + { + "chain": [], + "id": "gap_synthesis", + "inputs": [ + "provenance_synthesis", + "reproduction_synthesis", + "theory_synthesis", + "verification_synthesis" + ], + "kind": "step", + "mission": "Write data_gaps_report - the standalone gaps deliverable. Aggregate the `gaps` from provenance_report, reproduction_report, theory_report, and verification_report into one ledger (item, missing_data, blocks, severity, and the stage it arose in), and emit next_steps whose kinds may be any flow or task in the taxonomy, not only auto-ds runs. This is the single place data and rigor gaps live; the master report only links to it.", + "name": "gap_synthesis", + "parent": null, + "replan": false, + "task": "gap_synthesis" + }, + { + "chain": [], + "id": "final_synthesis", + "inputs": [ + "provenance_synthesis", + "reproduction_synthesis", + "theory_synthesis", + "verification_synthesis", + "gap_synthesis" + ], + "kind": "step", + "mission": "Write research_report - the theory-led master deliverable, focused on the theory runs, not on what was reproduced. Structure - (1) the idea - the cross-cutting mechanism in one paragraph; (2) the theories - theory_highlights by objective, each with its novelty and outcome; (3) does it hold - a brief read of the novelty-by-verification result; (4) what was done - a short provenance list of the pipeline executed; (5) read more - sub_reports linking to the reproduction_report, verification_report, and data_gaps_report. Include the inference_chain from each headline claim back to the auto-ds signal, tensions_and_surprises, the decisive figure embedded in the report, and `links`. Do NOT restate the full reproduction ledger (it lives in reproduction_report) or the gaps (they live in data_gaps_report) - reference them.", + "name": "final_synthesis", + "parent": null, + "replan": false, + "task": "final_synthesis" + } + ] + }, + "data_provenance": { + "edges": [ + { + "external": false, + "input": "provenance_search", + "source": "provenance_search", + "target": "provenance_extraction" + }, + { + "external": false, + "input": "provenance_search", + "source": "provenance_search", + "target": "data_acquisition" + }, + { + "external": false, + "input": "provenance_extraction", + "source": "provenance_extraction", + "target": "data_acquisition" + }, + { + "external": false, + "input": "provenance_search", + "source": "provenance_search", + "target": "provenance_synthesis" + }, + { + "external": false, + "input": "provenance_extraction", + "source": "provenance_extraction", + "target": "provenance_synthesis" + }, + { + "external": false, + "input": "data_acquisition", + "source": "data_acquisition", + "target": "provenance_synthesis" + } + ], + "mission": "Source the papers and datasets the auto-ds run was built on. Search the literature for the publication(s) behind the run's datasets, extract their data-availability and repository details (reusing the theorizer extraction schema and its returned findings), acquire the open data so it becomes the data in hand, and record what could not be obtained. This runs before reproduction so the underlying data is sourced rather than assumed.", + "nodes": [ + { + "chain": [ + "asta literature find", + "asta papers search" + ], + "id": "provenance_search", + "inputs": [], + "kind": "step", + "mission": "Read the run's dataset descriptions and intent from its metadata, then search the literature for the paper(s) that published or describe each dataset. Emit one data_source per run dataset naming the candidate source paper (paper_id, title, url).", + "name": "provenance_search", + "parent": null, + "replan": false, + "task": "provenance_search" + }, + { + "chain": [ + "asta generate-theories build-extraction-schema", + "asta generate-theories find-and-extract" + ], + "id": "provenance_extraction", + "inputs": [ + "provenance_search" + ], + "kind": "step", + "mission": "Reuse the theorizer extraction (build-extraction-schema, find-and-extract) on the candidate papers - or its already-returned findings if the same papers were extracted there - to pull out each paper's data-availability statement, repository, DOI/accession, and dataset identifiers. Seed `paper_store` with identifier-only entries ({corpus_id}) for the candidate papers and set search_additional_papers false so the corpus is exactly those seeds. Emit one source_access per data_source (keyed by its id); the data_source records themselves are immutable.", + "name": "provenance_extraction", + "parent": null, + "replan": false, + "task": "provenance_extraction" + }, + { + "chain": [ + "asta documents", + "asta autodiscovery upload" + ], + "id": "data_acquisition", + "inputs": [ + "provenance_search", + "provenance_extraction" + ], + "kind": "step", + "mission": "For each openly available source, fetch the data files and register them as a dataset - the data in hand that reproduction, testability_triage, and verification later use. Emit one acquisition per data_source with access_status, local_path, and the registered dataset_id. Validate every fetched dataset against its paper before registering it - n, schema/variables, units, missingness - and record the check in validation_note; a dataset that fails validation is a gap, not an input. For restricted or not-found data, record a gap rather than blocking downstream work.", + "name": "data_acquisition", + "parent": null, + "replan": false, + "task": "data_acquisition" + }, + { + "chain": [], + "id": "provenance_synthesis", + "inputs": [ + "provenance_search", + "provenance_extraction", + "data_acquisition" + ], + "kind": "step", + "mission": "Write provenance_report - which papers and datasets were sourced, their access status and local paths, what was acquired and validated, and what could not be obtained (carried in `gaps` for gap_synthesis to aggregate). Put how the sources were matched and the data merged/validated (join key, resulting n vs the run's n) in method_note.", + "name": "provenance_synthesis", + "parent": null, + "replan": false, + "task": "provenance_synthesis" + } + ] + }, + "hypothesis_driven_research": { + "edges": [ + { + "external": false, + "input": "literature_review", + "source": "literature_review", + "target": "hypothesis_formation" + }, + { + "external": false, + "input": "hypothesis_formation", + "source": "hypothesis_formation", + "target": "testing__experiment_design" + }, + { + "external": false, + "input": "literature_review", + "source": "literature_review", + "target": "testing__experiment_design" + }, + { + "external": false, + "input": "experiment_design", + "source": "testing__experiment_design", + "target": "testing__data_acquisition" + }, + { + "external": false, + "input": "experiment_design", + "source": "testing__experiment_design", + "target": "testing__analysis" + }, + { + "external": false, + "input": "data_acquisition", + "source": "testing__data_acquisition", + "target": "testing__analysis" + }, + { + "external": false, + "input": "analysis", + "source": "testing__analysis", + "target": "testing__audit" + }, + { + "external": false, + "input": "experiment_design", + "source": "testing__experiment_design", + "target": "testing__adjudicate" + }, + { + "external": false, + "input": "analysis", + "source": "testing__analysis", + "target": "testing__adjudicate" + }, + { + "external": false, + "input": "audit", + "source": "testing__audit", + "target": "testing__adjudicate" + }, + { + "external": false, + "input": "hypothesis_formation", + "source": "hypothesis_formation", + "target": "hypothesis_synthesis" + }, + { + "external": false, + "input": "testing", + "source": "testing", + "target": "hypothesis_synthesis" + } + ], + "mission": "Answer a research question from mission.md the classic way - survey the literature, form explicit falsifiable hypotheses, and run one prespecified test per hypothesis on acquired data. Review, hypothesize, design, test, adjudicate, synthesize.", + "nodes": [ + { + "chain": [ + "asta literature find", + "asta papers search" + ], + "id": "literature_review", + "inputs": [], + "kind": "step", + "mission": "Survey the literature for the mission's question - what is known, what is contested, and which open gaps could be settled by an analysis on obtainable data. Emit key findings (with evidence uuids), the open gaps, and citations.", + "name": "literature_review", + "parent": null, + "replan": false, + "task": "literature_review" + }, + { + "chain": [ + "asta generate-theories build-extraction-schema", + "asta generate-theories find-and-extract" + ], + "id": "hypothesis_formation", + "inputs": [ + "literature_review" + ], + "kind": "step", + "mission": "Form a small set (typically 2-5) of falsifiable hypotheses from the review's open gaps - each a slim claim with its rationale, its falsifiable prediction, and the evidence it rests on. Prefer hypotheses testable on data the literature names. The theory machinery can help here - a hypothesis is a slim theory committed to one prediction; seed its `paper_store` with identifier-only entries ({corpus_id}) from the literature_review citations, with search_additional_papers false when the corpus should be exactly those seeds.", + "name": "hypothesis_formation", + "parent": null, + "replan": false, + "task": "hypothesis_formation" + }, + { + "id": "testing", + "kind": "group", + "mission": "One branch per hypothesis (created at replan, once hypothesis_formation has named them). Test that hypothesis end to end.", + "name": "testing", + "parent": null, + "replan": true + }, + { + "chain": [ + "asta experiment" + ], + "id": "testing__experiment_design", + "inputs": [ + "hypothesis_formation", + "literature_review" + ], + "kind": "step", + "mission": "Design the test - operationalization, required data, feasibility - and commit the prespecified test (test, metric, success_threshold) before any data is analyzed. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate - feasible/proxy_only branches get data_acquisition (when the design names data not yet in hand), analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a) plus a data_acquisition task holding the gap.", + "name": "experiment_design", + "parent": "testing", + "replan": false, + "task": "experiment_design" + }, + { + "chain": [ + "asta documents", + "asta autodiscovery upload" + ], + "id": "testing__data_acquisition", + "inputs": [ + "experiment_design" + ], + "kind": "step", + "mission": "Fetch the datasets the design requires. Validate each against its source (n, schema/variables, units, missingness) and record the check in validation_note; a dataset that fails validation is a gap, not an input.", + "name": "data_acquisition", + "parent": "testing", + "replan": false, + "task": "data_acquisition" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "testing__analysis", + "inputs": [ + "experiment_design", + "data_acquisition" + ], + "kind": "step", + "mission": "Run the prespecified test on the validated data. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.", + "name": "analysis", + "parent": "testing", + "replan": false, + "task": "analysis" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "testing__audit", + "inputs": [ + "analysis" + ], + "kind": "step", + "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.", + "name": "audit", + "parent": "testing", + "replan": false, + "task": "audit" + }, + { + "chain": [], + "id": "testing__adjudicate", + "inputs": [ + "experiment_design", + "analysis", + "audit" + ], + "kind": "step", + "mission": "Finalize the hypothesis's outcome (held, partial, failed, underpowered, or n/a) and observed effect size against the design's prespecified success_threshold, from the analysis and audit. Emit an adjudication referencing the hypothesis id.", + "name": "adjudicate", + "parent": "testing", + "replan": false, + "task": "adjudicate" + }, + { + "chain": [], + "id": "hypothesis_synthesis", + "inputs": [ + "hypothesis_formation", + "testing" + ], + "kind": "step", + "mission": "Fan the branches in. Write hypothesis_report - the ledger of hypotheses and their outcomes (joined from the hypotheses and their adjudications), what the verdicts say about the mission's question, the open questions that remain, and any gaps for follow-up work. Include an outcomes/effect-size figure across the hypotheses.", + "name": "hypothesis_synthesis", + "parent": null, + "replan": false, + "task": "hypothesis_synthesis" + } + ] + }, + "reproduction": { + "edges": [ + { + "external": false, + "input": "data_driven_discovery", + "source": "data_driven_discovery", + "target": "law_extraction" + }, + { + "external": false, + "input": "law_extraction", + "source": "law_extraction", + "target": "evidence_gathering" + }, + { + "external": false, + "input": "law_extraction", + "source": "law_extraction", + "target": "replication__experiment_design" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "evidence_gathering", + "target": "replication__experiment_design" + }, + { + "external": false, + "input": "experiment_design", + "source": "replication__experiment_design", + "target": "replication__analysis" + }, + { + "external": false, + "input": "evidence_gathering", + "source": "evidence_gathering", + "target": "replication__analysis" + }, + { + "external": false, + "input": "analysis", + "source": "replication__analysis", + "target": "replication__audit" + }, + { + "external": false, + "input": "experiment_design", + "source": "replication__experiment_design", + "target": "replication__adjudicate" + }, + { + "external": false, + "input": "analysis", + "source": "replication__analysis", + "target": "replication__adjudicate" + }, + { + "external": false, + "input": "audit", + "source": "replication__audit", + "target": "replication__adjudicate" + }, + { + "external": false, + "input": "law_extraction", + "source": "law_extraction", + "target": "reproduction_synthesis" + }, + { + "external": false, + "input": "replication", + "source": "replication", + "target": "reproduction_synthesis" + } + ], + "mission": "Ingest an auto-ds run, group its experiments into laws, find independent data once for all of them, then reproduce each law. The verdict is two-axis - outcome (held/partial/failed/underpowered/n-a) crossed with testability (tested/proxy_only/untestable) - and comes from the branch's adjudication, not the ingested run.", + "nodes": [ + { + "chain": [ + "asta autodiscovery run", + "asta autodiscovery experiments" + ], + "id": "data_driven_discovery", + "inputs": [], + "kind": "step", + "mission": "Ingest the run. If the mission names a provided run directory, import it and run no fresh auto-ds (skip `asta autodiscovery run`); otherwise run a fresh one (config n_experiments). Keep the raw experiment nodes as artifacts. Also register the run's own dataset(s) as a dataset entry - this is the \"data in hand\" that testability_triage and verification later test theories against, so it must be a first-class output, not just the run directory. When data_provenance ran first, prefer the datasets it acquired (with their local paths) as the data in hand, falling back to the run's described datasets where acquisition was restricted.", + "name": "data_driven_discovery", + "parent": null, + "replan": false, + "task": "data_driven_discovery" + }, + { + "chain": [], + "id": "law_extraction", + "inputs": [ + "data_driven_discovery" + ], + "kind": "step", + "mission": "Group the experiments into empirical laws. Ground each law in the run's own search signal (surprisal, value, visits, belief_change), and record the construct it claims, how the run measured it, and why these experiments form one law. Laws are identity records - their verdicts come later, from each branch's adjudication.", + "name": "law_extraction", + "parent": null, + "replan": false, + "task": "law_extraction" + }, + { + "chain": [ + "asta literature find", + "asta papers search", + "asta documents", + "asta autodiscovery upload" + ], + "id": "evidence_gathering", + "inputs": [ + "law_extraction" + ], + "kind": "step", + "mission": "One comprehensive search across all laws for independent datasets, acquiring what is available. Validate each acquired dataset against its source (n, schema/variables, units, missingness) before registering it; a dataset that fails validation is a gap, not an input. Emit a dataset registry that tags which laws each dataset can test.", + "name": "evidence_gathering", + "parent": null, + "replan": false, + "task": "evidence_gathering" + }, + { + "id": "replication", + "kind": "group", + "mission": "One branch per law (created at replan, once law_extraction has produced the law set). Reproduce that law on the independent data.", + "name": "replication", + "parent": null, + "replan": true + }, + { + "chain": [ + "asta experiment" + ], + "id": "replication__experiment_design", + "inputs": [ + "law_extraction", + "evidence_gathering" + ], + "kind": "step", + "mission": "State the original operationalization, the independent one, and whether they are equivalent or only a proxy. Set feasibility and commit the prespecified test (test, metric, success_threshold) before any analysis runs. When an experiment-designer run informs the design, record its query in experiment_design_query and reference its full recipe_to_implement as an artifact (subtype experiment-design) - never inline it. What happens next is plan's Gate, not this step's job - feasible/proxy_only branches get analysis, audit, and adjudicate; data_unavailable/construct_mismatch branches get only adjudicate (outcome n/a, testability untestable) plus a data_acquisition task holding the gap.", + "name": "experiment_design", + "parent": "replication", + "replan": false, + "task": "experiment_design" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "replication__analysis", + "inputs": [ + "experiment_design", + "evidence_gathering" + ], + "kind": "step", + "mission": "Run the reproduction on the acquired data, per the design's prespecified test. Effect size and outcome come from here. Emit at least one figure behind the numbers - DataVoyager returns figures as imageb64, so decode each to a PNG under .asta/analyze-data/<slug>/figures/ and put the path in figure.image (or render your own); list them in `figures` with captions.", + "name": "analysis", + "parent": "replication", + "replan": false, + "task": "analysis" + }, + { + "chain": [ + "asta analyze-data submit", + "asta analyze-data poll" + ], + "id": "replication__audit", + "inputs": [ + "analysis" + ], + "kind": "step", + "mission": "Try to refute the analysis or find artifacts before its verdict stands. Include a negative control - rerun with the predictor shuffled (or equivalent) and confirm the effect disappears.", + "name": "audit", + "parent": "replication", + "replan": false, + "task": "audit" + }, + { + "chain": [], + "id": "replication__adjudicate", + "inputs": [ + "experiment_design", + "analysis", + "audit" + ], + "kind": "step", + "mission": "Finalize the law's two-axis verdict (outcome crossed with testability), independence axes, and observed effect size from the analysis and audit, checked against the design's prespecified success_threshold; or outcome n/a, testability untestable when the branch was infeasible. Emit an adjudication referencing the law id - the law record itself is never re-emitted.", + "name": "adjudicate", + "parent": "replication", + "replan": false, + "task": "adjudicate" + }, + { + "chain": [], + "id": "reproduction_synthesis", + "inputs": [ + "law_extraction", + "replication" + ], + "kind": "step", + "mission": "Fan the branches in. Write reproduction_report - the two-axis ledger (each law's outcome crossed with testability, plus effect sizes, independence axes, and evidence, joined from the laws and their adjudications), what held and what failed or was untestable, and a method_note on how the reproduction was done (independent data versus literature cross-check). Include an effect-size comparison figure (source vs observed, one mark per law). Record the rigor gaps from infeasible branches in `gaps` for gap_synthesis to aggregate.", + "name": "reproduction_synthesis", + "parent": null, + "replan": false, + "task": "reproduction_synthesis" + } + ] + }, + "theorizer": { + "edges": [ + { + "external": true, + "input": "law_extraction", + "source": "ext__law_extraction", + "target": "evidence_extraction" + }, + { + "external": true, + "input": "adjudicate", + "source": "ext__adjudicate", + "target": "evidence_extraction" + }, + { + "external": false, + "input": "evidence_extraction", + "source": "evidence_extraction", + "target": "theory_generation__theory_formation" + }, + { + "external": false, + "input": "theory_generation", + "source": "theory_generation", + "target": "testability_triage" + }, + { + "external": true, + "input": "data_driven_discovery", + "source": "ext__data_driven_discovery", + "target": "testability_triage" + }, + { + "external": true, + "input": "evidence_gathering", + "source": "ext__evidence_gathering", + "target": "testability_triage" + }, + { + "external": false, + "input": "testability_triage", + "source": "testability_triage", + "target": "novelty_assessment" + }, + { + "external": false, + "input": "theory_generation", + "source": "theory_generation", + "target": "theory_synthesis" + }, + { + "external": false, + "input": "novelty_assessment", + "source": "novelty_assessment", + "target": "theory_synthesis" + }, + { + "external": false, + "input": "testability_triage", + "source": "testability_triage", + "target": "theory_synthesis" + } + ], + "mission": "Theories of the reproduced laws, grounded in both the literature and the reproduction's numbers, generated under two objectives and filtered to what the data on hand can actually test.", + "nodes": [ + { + "chain": [ + "asta generate-theories build-extraction-schema", + "asta generate-theories find-and-extract" + ], + "id": "evidence_extraction", + "inputs": [ + "law_extraction", + "adjudicate" + ], + "kind": "step", + "mission": "Shared across both objective branches. Consume the reproduced laws - the empirical_law records plus the adjudications the replication branches finalized (outcome and testability filled), not the pre-reproduction candidates alone. Build the extraction schema and find-and-extract evidence for them in one pass; this finds the papers and pulls their findings. When upstream steps already identified papers (e.g. provenance data_sources), seed `paper_store` with identifier-only entries ({corpus_id}) - the theorizer and the experiment designer accept the same paper_store payload. Seek disconfirming evidence too, and tag each finding with the law it bears on.", + "name": "evidence_extraction", + "parent": null, + "replan": false, + "task": "evidence_extraction" + }, + { + "id": "theory_generation", + "kind": "group", + "mission": "Two branches over the same shared extraction store, one per generation objective (accuracy_focused, novelty_focused). Both branches are known up front, so they are created together. Ground theories in the reproduction's effect sizes and verdicts; populate conflicting_evidence, and make unaccounted_for address the partial and untestable laws.", + "name": "theory_generation", + "parent": null, + "replan": false + }, + { + "chain": [ + "asta generate-theories form-theory" + ], + "id": "theory_generation__theory_formation", + "inputs": [ + "evidence_extraction" + ], + "kind": "step", + "mission": "Form theories from the shared extraction store under this branch's objective.", + "name": "theory_formation", + "parent": "theory_generation", + "replan": false, + "task": "theory_formation" + }, + { + "chain": [], + "id": "testability_triage", + "inputs": [ + "theory_generation", + "data_driven_discovery", + "evidence_gathering" + ], + "kind": "step", + "mission": "Fan both branches in. Compare each theory's required data against the data in hand - the source dataset registered by data_driven_discovery plus any datasets evidence_gathering acquired - and decide which theories are testable now. For each testable theory, commit the prespecified proposed_test (test, metric, success_threshold) that its verification branch will run and adjudicate against. Theories needing new data carry a gap routed to next_steps.", + "name": "testability_triage", + "parent": null, + "replan": false, + "task": "testability_triage" + }, + { + "chain": [ + "asta generate-theories evaluate-novelty" + ], + "id": "novelty_assessment", + "inputs": [ + "testability_triage" + ], + "kind": "step", + "mission": "Stock novelty scoring against the shared corpus, run only on the testable subset of theories.", + "name": "novelty_assessment", + "parent": null, + "replan": false, + "task": "novelty_assessment" + }, + { + "chain": [], + "id": "theory_synthesis", + "inputs": [ + "theory_generation", + "novelty_assessment", + "testability_triage" + ], + "kind": "step", + "mission": "Fan the theorizer in. Write theory_report - the focus of the deliverable. Lead with the cross-cutting mechanism, then catalog the theories under each objective (accuracy_focused, novelty_focused) with their grounds_law_ids, novelty, whether they are testable now, and their supporting evidence ids; summarize how novel the set is; list the new_predictions and the open_threads. Carry any data needs in `gaps`.", + "name": "theory_synthesis", + "parent": null, + "replan": false, + "task": "theory_synthesis" + }, + { + "id": "ext__adjudicate", + "kind": "external", + "mission": "", + "name": "adjudicate", + "parent": null, + "replan": false, + "task": "adjudicate" + }, + { + "id": "ext__data_driven_discovery", + "kind": "external", + "mission": "", + "name": "data_driven_discovery", + "parent": null, + "replan": false, + "task": "data_driven_discovery" + }, + { + "id": "ext__evidence_gathering", + "kind": "external", + "mission": "", + "name": "evidence_gathering", + "parent": null, + "replan": false, + "task": "evidence_gathering" + }, + { + "id": "ext__law_extraction", + "kind": "external", + "mission": "", + "name": "law_extraction", + "parent": null, + "replan": false, + "task": "law_extraction" + } + ] + } + }, + "format_version": 1, + "schema_version": 2, + "tasks": { + "adjudicate": { + "output": { + "adjudication": "adjudication", + "artifacts": [ + "artifact" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "adjudication": { + "additionalProperties": true, + "properties": { + "data_used": { + "type": "string" + }, + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "prespecified_check": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "outcome", + "testability", + "effect_size_observed", + "prespecified_check", + "independence_axes", + "data_used", + "evidence" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/adjudicate.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "adjudication": { + "$ref": "#/$defs/adjudication" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + } + }, + "required": [ + "adjudication", + "artifacts" + ], + "title": "adjudicate", + "type": "object" + } + }, + "analysis": { + "output": { + "analysis": "analysis", + "artifacts": [ + "artifact" + ], + "figures": [ + "figure" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "analysis": { + "additionalProperties": true, + "properties": { + "assumptions": { + "type": "string" + }, + "code": { + "type": "string" + }, + "final_answer": { + "type": "string" + } + }, + "required": [ + "final_answer", + "assumptions", + "code" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/analysis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "analysis": { + "$ref": "#/$defs/analysis" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + } + }, + "required": [ + "analysis", + "figures", + "artifacts" + ], + "title": "analysis", + "type": "object" + } + }, + "audit": { + "output": { + "artifacts": [ + "artifact" + ], + "audit_report": "audit_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "audit_report": { + "additionalProperties": true, + "properties": { + "artifacts_found": { + "items": { + "type": "string" + }, + "type": "array" + }, + "challenges": { + "items": { + "additionalProperties": true, + "properties": { + "check": { + "type": "string" + }, + "concern": { + "type": "string" + }, + "outcome": { + "type": "string" + } + }, + "required": [ + "concern", + "check", + "outcome" + ], + "type": "object" + }, + "type": "array" + }, + "recommended_adjustment": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "verdict_survives": { + "type": "boolean" + } + }, + "required": [ + "subject_id", + "challenges", + "artifacts_found", + "verdict_survives", + "recommended_adjustment" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/audit.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "audit_report": { + "$ref": "#/$defs/audit_report" + } + }, + "required": [ + "audit_report", + "artifacts" + ], + "title": "audit", + "type": "object" + } + }, + "cohort_assembly": { + "output": { + "artifacts": [ + "artifact" + ], + "cohort": "cohort", + "datasets": [ + "dataset" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "cohort": { + "additionalProperties": true, + "properties": { + "discovery_subset": { + "additionalProperties": true, + "properties": { + "definition": { + "type": "string" + }, + "n": { + "type": "number" + }, + "path": { + "type": "string" + } + }, + "required": [ + "definition", + "n", + "path" + ], + "type": "object" + }, + "exclusion_criteria": { + "type": "string" + }, + "holdout_subset": { + "additionalProperties": true, + "properties": { + "definition": { + "type": "string" + }, + "n": { + "type": "number" + }, + "path": { + "type": "string" + } + }, + "required": [ + "definition", + "n", + "path" + ], + "type": "object" + }, + "id": { + "type": "string" + }, + "inclusion_criteria": { + "type": "string" + }, + "research_question": { + "type": "string" + }, + "run_id": { + "type": "string" + }, + "sampling": { + "type": "string" + }, + "source_data_sources": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "research_question", + "inclusion_criteria", + "exclusion_criteria", + "sampling", + "source_data_sources", + "discovery_subset", + "holdout_subset", + "run_id" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/cohort_assembly.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "cohort": { + "$ref": "#/$defs/cohort" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "cohort", + "datasets", + "artifacts" + ], + "title": "cohort_assembly", + "type": "object" + } + }, + "data_acquisition": { + "output": { + "acquisitions": [ + "acquisition" + ], + "artifacts": [ + "artifact" + ], + "datasets": [ + "dataset" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "acquisition": { + "additionalProperties": true, + "properties": { + "access_status": { + "enum": [ + "acquired", + "open_unfetched", + "restricted", + "not_found" + ] + }, + "data_source_id": { + "type": "string" + }, + "dataset_id": { + "type": "string" + }, + "local_path": { + "type": "string" + }, + "validation_note": { + "type": "string" + } + }, + "required": [ + "data_source_id", + "access_status", + "local_path", + "dataset_id", + "validation_note" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/data_acquisition.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "acquisitions": { + "items": { + "$ref": "#/$defs/acquisition" + }, + "type": "array" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "acquisitions", + "datasets", + "artifacts" + ], + "title": "data_acquisition", + "type": "object" + } + }, + "data_driven_discovery": { + "output": { + "artifacts": [ + "artifact" + ], + "datasets": [ + "dataset" + ], + "experiments": [ + "experiment" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "experiment": { + "additionalProperties": true, + "properties": { + "analysis": { + "type": "string" + }, + "experiment_id": { + "type": "string" + }, + "hypothesis": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "required": [ + "experiment_id", + "status", + "hypothesis", + "analysis" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/data_driven_discovery.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + }, + "experiments": { + "items": { + "$ref": "#/$defs/experiment" + }, + "type": "array" + } + }, + "required": [ + "experiments", + "datasets", + "artifacts" + ], + "title": "data_driven_discovery", + "type": "object" + } + }, + "discovery_run": { + "output": { + "artifacts": [ + "artifact" + ], + "empirical_laws": [ + "empirical_law" + ], + "experiments": [ + "experiment" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "empirical_law": { + "additionalProperties": true, + "properties": { + "construct": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "grouping_rationale": { + "type": "string" + }, + "id": { + "type": "string" + }, + "mcts_provenance": { + "additionalProperties": true, + "properties": { + "is_surprising": { + "type": "boolean" + }, + "posterior_belief": { + "type": "object" + }, + "prior_belief": { + "type": "object" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "surprise", + "is_surprising", + "prior_belief", + "posterior_belief" + ], + "type": "object" + }, + "source_node": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "construct", + "source_operationalization", + "source_node", + "effect_size_source", + "grouping_rationale" + ], + "type": "object" + }, + "experiment": { + "additionalProperties": true, + "properties": { + "analysis": { + "type": "string" + }, + "experiment_id": { + "type": "string" + }, + "hypothesis": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "required": [ + "experiment_id", + "status", + "hypothesis", + "analysis" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/discovery_run.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "empirical_laws": { + "items": { + "$ref": "#/$defs/empirical_law" + }, + "type": "array" + }, + "experiments": { + "items": { + "$ref": "#/$defs/experiment" + }, + "type": "array" + } + }, + "required": [ + "experiments", + "empirical_laws", + "artifacts" + ], + "title": "discovery_run", + "type": "object" + } + }, + "discovery_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "discovery_report": "discovery_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "discovery_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "interpretation": { + "type": "string" + }, + "laws": { + "items": { + "additionalProperties": true, + "properties": { + "deciding_experiment": { + "type": "string" + }, + "effect_size_discovery": { + "type": "string" + }, + "effect_size_holdout": { + "type": "string" + }, + "law_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "law_id", + "statement", + "surprise", + "outcome", + "deciding_experiment", + "effect_size_discovery", + "effect_size_holdout" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "next_steps": { + "items": { + "$ref": "#/$defs/next_run_proposal" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "run_id": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "run_id", + "laws", + "interpretation", + "next_steps", + "figures", + "gaps", + "links" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "next_run_proposal": { + "additionalProperties": true, + "properties": { + "data_needed": { + "type": "string" + }, + "expected_signature": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "priority": { + "enum": [ + "high", + "medium", + "low" + ] + }, + "tests": { + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "kind", + "title", + "tests", + "data_needed", + "expected_signature", + "priority" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/discovery_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "discovery_report": { + "$ref": "#/$defs/discovery_report" + } + }, + "required": [ + "discovery_report", + "artifacts" + ], + "title": "discovery_synthesis", + "type": "object" + } + }, + "evidence_extraction": { + "output": { + "artifacts": [ + "artifact" + ], + "extracted_data": "extracted_data" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "extracted_data": { + "additionalProperties": true, + "properties": { + "extraction_schema_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "rows": { + "items": { + "additionalProperties": true, + "properties": { + "brief_description": { + "type": "string" + }, + "citation_title": { + "type": "string" + }, + "name_full": { + "type": "string" + }, + "name_short": { + "type": "string" + }, + "uuid": { + "type": "string" + } + }, + "required": [ + "name_short", + "name_full", + "brief_description", + "citation_title", + "uuid" + ], + "type": "object" + }, + "type": "array" + }, + "run_id": { + "type": "string" + } + }, + "required": [ + "id", + "run_id", + "paper_id", + "extraction_schema_id", + "rows" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/evidence_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "extracted_data": { + "$ref": "#/$defs/extracted_data" + } + }, + "required": [ + "extracted_data", + "artifacts" + ], + "title": "evidence_extraction", + "type": "object" + } + }, + "evidence_gathering": { + "output": { + "artifacts": [ + "artifact" + ], + "datasets": [ + "dataset" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "dataset": { + "additionalProperties": true, + "properties": { + "covers_laws": { + "items": { + "type": "string" + }, + "type": "array" + }, + "definition": { + "type": "string" + }, + "id": { + "type": "string" + }, + "n": { + "type": "number" + }, + "sampling": { + "type": "string" + }, + "source": { + "type": "string" + }, + "variables": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "definition", + "source", + "n", + "sampling", + "variables", + "covers_laws" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/evidence_gathering.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "datasets": { + "items": { + "$ref": "#/$defs/dataset" + }, + "type": "array" + } + }, + "required": [ + "datasets", + "artifacts" + ], + "title": "evidence_gathering", + "type": "object" + } + }, + "experiment_design": { + "output": { + "artifacts": [ + "artifact" + ], + "experiment_design": "experiment_design" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "experiment_design": { + "additionalProperties": true, + "properties": { + "construct_equivalence": { + "enum": [ + "equivalent", + "proxy", + "mismatch" + ] + }, + "data_gap": { + "type": "string" + }, + "experiment_design_query": { + "type": "string" + }, + "experiment_name": { + "type": "string" + }, + "feasibility": { + "enum": [ + "feasible", + "proxy_only", + "data_unavailable", + "construct_mismatch" + ] + }, + "independent_operationalization": { + "type": "string" + }, + "plain_language_description": { + "type": "string" + }, + "prespecified": { + "additionalProperties": true, + "properties": { + "metric": { + "type": "string" + }, + "success_threshold": { + "type": "string" + }, + "test": { + "type": "string" + } + }, + "required": [ + "test", + "metric", + "success_threshold" + ], + "type": "object" + }, + "required_data": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "experiment_name", + "plain_language_description", + "source_operationalization", + "independent_operationalization", + "construct_equivalence", + "feasibility", + "required_data", + "data_gap", + "experiment_design_query", + "prespecified" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/experiment_design.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "experiment_design": { + "$ref": "#/$defs/experiment_design" + } + }, + "required": [ + "experiment_design", + "artifacts" + ], + "title": "experiment_design", + "type": "object" + } + }, + "final_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "research_report": "research_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "research_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "inference_chain": { + "items": { + "additionalProperties": true, + "properties": { + "chain": { + "items": { + "type": "string" + }, + "type": "array" + }, + "claim": { + "type": "string" + } + }, + "required": [ + "claim", + "chain" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "mechanism": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + }, + "grounded_in": { + "items": { + "type": "string" + }, + "type": "array" + }, + "statement": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "statement", + "grounded_in", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "report_path": { + "type": "string" + }, + "sub_reports": { + "items": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "one_line": { + "type": "string" + }, + "report_path": { + "type": "string" + } + }, + "required": [ + "kind", + "report_path", + "one_line" + ], + "type": "object" + }, + "type": "array" + }, + "tensions_and_surprises": { + "items": { + "additionalProperties": true, + "properties": { + "evidence": { + "type": "string" + }, + "observation": { + "type": "string" + }, + "where": { + "type": "string" + } + }, + "required": [ + "observation", + "where", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "theory_highlights": { + "items": { + "additionalProperties": true, + "properties": { + "claim": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "claim", + "novelty", + "outcome" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + }, + "what_was_done": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "report_path", + "title", + "headline", + "mechanism", + "theory_highlights", + "inference_chain", + "what_was_done", + "sub_reports", + "tensions_and_surprises", + "figures", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/final_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "research_report": { + "$ref": "#/$defs/research_report" + } + }, + "required": [ + "research_report", + "artifacts" + ], + "title": "final_synthesis", + "type": "object" + } + }, + "gap_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "data_gaps_report": "data_gaps_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "data_gaps_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "arose_in": { + "type": "string" + }, + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity", + "arose_in" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "next_steps": { + "items": { + "$ref": "#/$defs/next_run_proposal" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "gaps", + "next_steps", + "figures", + "links" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "next_run_proposal": { + "additionalProperties": true, + "properties": { + "data_needed": { + "type": "string" + }, + "expected_signature": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "priority": { + "enum": [ + "high", + "medium", + "low" + ] + }, + "tests": { + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "kind", + "title", + "tests", + "data_needed", + "expected_signature", + "priority" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/gap_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "data_gaps_report": { + "$ref": "#/$defs/data_gaps_report" + } + }, + "required": [ + "data_gaps_report", + "artifacts" + ], + "title": "gap_synthesis", + "type": "object" + } + }, + "holdout_replication": { + "output": { + "adjudication": "adjudication", + "artifacts": [ + "artifact" + ], + "figures": [ + "figure" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "adjudication": { + "additionalProperties": true, + "properties": { + "data_used": { + "type": "string" + }, + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "prespecified_check": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "outcome", + "testability", + "effect_size_observed", + "prespecified_check", + "independence_axes", + "data_used", + "evidence" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/holdout_replication.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "adjudication": { + "$ref": "#/$defs/adjudication" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + } + }, + "required": [ + "adjudication", + "figures", + "artifacts" + ], + "title": "holdout_replication", + "type": "object" + } + }, + "hypothesis_formation": { + "output": { + "artifacts": [ + "artifact" + ], + "hypotheses": [ + "hypothesis" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "hypothesis": { + "additionalProperties": true, + "properties": { + "falsifiable_prediction": { + "type": "string" + }, + "grounds": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "id": { + "type": "string" + }, + "rationale": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "rationale", + "falsifiable_prediction", + "grounds" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/hypothesis_formation.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "hypotheses": { + "items": { + "$ref": "#/$defs/hypothesis" + }, + "type": "array" + } + }, + "required": [ + "hypotheses", + "artifacts" + ], + "title": "hypothesis_formation", + "type": "object" + } + }, + "hypothesis_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "hypothesis_report": "hypothesis_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "hypothesis_report": { + "additionalProperties": true, + "properties": { + "answer": { + "type": "string" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "ledger": { + "items": { + "additionalProperties": true, + "properties": { + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "hypothesis_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + } + }, + "required": [ + "hypothesis_id", + "statement", + "outcome", + "effect_size_observed", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "open_questions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "question": { + "type": "string" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "question", + "ledger", + "answer", + "open_questions", + "figures", + "gaps", + "links" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/hypothesis_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "hypothesis_report": { + "$ref": "#/$defs/hypothesis_report" + } + }, + "required": [ + "hypothesis_report", + "artifacts" + ], + "title": "hypothesis_synthesis", + "type": "object" + } + }, + "law_extraction": { + "output": { + "artifacts": [ + "artifact" + ], + "empirical_laws": [ + "empirical_law" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "empirical_law": { + "additionalProperties": true, + "properties": { + "construct": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "grouping_rationale": { + "type": "string" + }, + "id": { + "type": "string" + }, + "mcts_provenance": { + "additionalProperties": true, + "properties": { + "is_surprising": { + "type": "boolean" + }, + "posterior_belief": { + "type": "object" + }, + "prior_belief": { + "type": "object" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "surprise", + "is_surprising", + "prior_belief", + "posterior_belief" + ], + "type": "object" + }, + "source_node": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "construct", + "source_operationalization", + "source_node", + "effect_size_source", + "grouping_rationale" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/law_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "empirical_laws": { + "items": { + "$ref": "#/$defs/empirical_law" + }, + "type": "array" + } + }, + "required": [ + "empirical_laws", + "artifacts" + ], + "title": "law_extraction", + "type": "object" + } + }, + "literature_review": { + "output": { + "artifacts": [ + "artifact" + ], + "literature_review": "literature_review" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "literature_review": { + "additionalProperties": true, + "properties": { + "citations": { + "items": { + "additionalProperties": true, + "properties": { + "corpus_id": { + "type": "number" + }, + "id": { + "type": "string" + }, + "relevance": { + "type": "string" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "id", + "corpus_id", + "title", + "url", + "relevance" + ], + "type": "object" + }, + "type": "array" + }, + "key_findings": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "open_gaps": { + "items": { + "type": "string" + }, + "type": "array" + }, + "summary": { + "type": "string" + } + }, + "required": [ + "summary", + "key_findings", + "open_gaps", + "citations" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/literature_review.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "literature_review": { + "$ref": "#/$defs/literature_review" + } + }, + "required": [ + "literature_review", + "artifacts" + ], + "title": "literature_review", + "type": "object" + } + }, + "novelty_assessment": { + "output": { + "artifacts": [ + "artifact" + ], + "theory_evaluations": [ + "theory_evaluation" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory_evaluation": { + "additionalProperties": true, + "properties": { + "explanation": { + "type": "string" + }, + "id": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "overall_support": { + "enum": [ + "supports", + "mixed", + "contradicts", + "inconclusive" + ] + }, + "overall_support_raw": { + "type": "string" + }, + "statement_evaluations": { + "items": { + "additionalProperties": true, + "properties": { + "explanation": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "statement_index": { + "type": "number" + } + }, + "required": [ + "statement_index", + "novelty", + "explanation" + ], + "type": "object" + }, + "type": "array" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "id", + "theory_id", + "novelty", + "overall_support", + "explanation", + "statement_evaluations" + ], + "type": "object" + } + }, + "$id": "asta-research-step/novelty_assessment.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theory_evaluations": { + "items": { + "$ref": "#/$defs/theory_evaluation" + }, + "type": "array" + } + }, + "required": [ + "theory_evaluations", + "artifacts" + ], + "title": "novelty_assessment", + "type": "object" + } + }, + "provenance_extraction": { + "output": { + "artifacts": [ + "artifact" + ], + "extracted_data": "extracted_data", + "source_access": [ + "source_access" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "extracted_data": { + "additionalProperties": true, + "properties": { + "extraction_schema_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "rows": { + "items": { + "additionalProperties": true, + "properties": { + "brief_description": { + "type": "string" + }, + "citation_title": { + "type": "string" + }, + "name_full": { + "type": "string" + }, + "name_short": { + "type": "string" + }, + "uuid": { + "type": "string" + } + }, + "required": [ + "name_short", + "name_full", + "brief_description", + "citation_title", + "uuid" + ], + "type": "object" + }, + "type": "array" + }, + "run_id": { + "type": "string" + } + }, + "required": [ + "id", + "run_id", + "paper_id", + "extraction_schema_id", + "rows" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "source_access": { + "additionalProperties": true, + "properties": { + "data_availability": { + "type": "string" + }, + "data_source_id": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "repository": { + "type": "string" + } + }, + "required": [ + "data_source_id", + "data_availability", + "repository", + "identifier" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "extracted_data": { + "$ref": "#/$defs/extracted_data" + }, + "source_access": { + "items": { + "$ref": "#/$defs/source_access" + }, + "type": "array" + } + }, + "required": [ + "extracted_data", + "source_access", + "artifacts" + ], + "title": "provenance_extraction", + "type": "object" + } + }, + "provenance_search": { + "output": { + "artifacts": [ + "artifact" + ], + "data_sources": [ + "data_source" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "data_source": { + "additionalProperties": true, + "properties": { + "dataset_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "paper_title": { + "type": "string" + }, + "paper_url": { + "type": "string" + } + }, + "required": [ + "id", + "dataset_id", + "paper_id", + "paper_title", + "paper_url" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_search.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "data_sources": { + "items": { + "$ref": "#/$defs/data_source" + }, + "type": "array" + } + }, + "required": [ + "data_sources", + "artifacts" + ], + "title": "provenance_search", + "type": "object" + } + }, + "provenance_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "provenance_report": "provenance_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "provenance_report": { + "additionalProperties": true, + "properties": { + "acquired": { + "items": { + "type": "string" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "method_note": { + "type": "string" + }, + "not_acquired": { + "items": { + "type": "string" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "sources": { + "items": { + "additionalProperties": true, + "properties": { + "access_status": { + "enum": [ + "acquired", + "open_unfetched", + "restricted", + "not_found" + ] + }, + "dataset_id": { + "type": "string" + }, + "local_path": { + "type": "string" + }, + "paper_title": { + "type": "string" + }, + "paper_url": { + "type": "string" + }, + "repository": { + "type": "string" + } + }, + "required": [ + "dataset_id", + "paper_title", + "paper_url", + "repository", + "access_status", + "local_path" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "sources", + "method_note", + "acquired", + "not_acquired", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "provenance_report": { + "$ref": "#/$defs/provenance_report" + } + }, + "required": [ + "provenance_report", + "artifacts" + ], + "title": "provenance_synthesis", + "type": "object" + } + }, + "reproduction_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "reproduction_report": "reproduction_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "reproduction_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "laws_ledger": { + "items": { + "additionalProperties": true, + "properties": { + "effect_size_observed": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "law_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "law_id", + "statement", + "outcome", + "testability", + "effect_size_source", + "effect_size_observed", + "independence_axes", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "method_note": { + "type": "string" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + }, + "what_failed_or_untestable": { + "items": { + "type": "string" + }, + "type": "array" + }, + "what_held": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "report_path", + "title", + "headline", + "method_note", + "laws_ledger", + "what_held", + "what_failed_or_untestable", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/reproduction_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "reproduction_report": { + "$ref": "#/$defs/reproduction_report" + } + }, + "required": [ + "reproduction_report", + "artifacts" + ], + "title": "reproduction_synthesis", + "type": "object" + } + }, + "testability_triage": { + "output": { + "artifacts": [ + "artifact" + ], + "testability_triage": "testability_triage" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "testability_triage": { + "additionalProperties": true, + "properties": { + "assessments": { + "items": { + "additionalProperties": true, + "properties": { + "available_data": { + "type": "string" + }, + "gap": { + "type": "string" + }, + "proposed_test": { + "additionalProperties": true, + "properties": { + "metric": { + "type": "string" + }, + "success_threshold": { + "type": "string" + }, + "test": { + "type": "string" + } + }, + "required": [ + "test", + "metric", + "success_threshold" + ], + "type": "object" + }, + "required_data": { + "type": "string" + }, + "testable_now": { + "type": "boolean" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "testable_now", + "available_data", + "required_data", + "proposed_test", + "gap" + ], + "type": "object" + }, + "type": "array" + }, + "testable_theory_ids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "assessments", + "testable_theory_ids" + ], + "type": "object" + } + }, + "$id": "asta-research-step/testability_triage.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "testability_triage": { + "$ref": "#/$defs/testability_triage" + } + }, + "required": [ + "testability_triage", + "artifacts" + ], + "title": "testability_triage", + "type": "object" + } + }, + "theory_formation": { + "output": { + "artifacts": [ + "artifact" + ], + "theories": [ + "theory" + ] + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory": { + "additionalProperties": true, + "properties": { + "components": { + "additionalProperties": true, + "properties": { + "generation_objective": { + "type": "string" + }, + "new_predictions_likely": { + "items": { + "type": "string" + }, + "type": "array" + }, + "new_predictions_unknown": { + "items": { + "type": "string" + }, + "type": "array" + }, + "theory_statements": { + "items": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "statement_name": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "theory_statement": { + "type": "string" + } + }, + "required": [ + "statement_name", + "theory_statement", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "type": "array" + }, + "unaccounted_for": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": [ + "generation_objective", + "theory_statements", + "new_predictions_likely", + "new_predictions_unknown", + "unaccounted_for" + ], + "type": "object" + }, + "description": { + "type": "string" + }, + "grounds_law_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "objective": { + "enum": [ + "accuracy_focused", + "novelty_focused" + ] + }, + "supporting_evidence_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "theory_query": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "description", + "theory_query", + "objective", + "grounds_law_ids", + "supporting_evidence_ids", + "components" + ], + "type": "object" + } + }, + "$id": "asta-research-step/theory_formation.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theories": { + "items": { + "$ref": "#/$defs/theory" + }, + "type": "array" + } + }, + "required": [ + "theories", + "artifacts" + ], + "title": "theory_formation", + "type": "object" + } + }, + "theory_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "theory_report": "theory_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "mechanism": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + }, + "grounded_in": { + "items": { + "type": "string" + }, + "type": "array" + }, + "statement": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "statement", + "grounded_in", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "new_predictions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "novelty_summary": { + "type": "string" + }, + "open_threads": { + "items": { + "type": "string" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "theories": { + "items": { + "additionalProperties": true, + "properties": { + "grounds_law_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "objective": { + "enum": [ + "accuracy_focused", + "novelty_focused" + ] + }, + "one_line": { + "type": "string" + }, + "supporting_evidence_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "testable_now": { + "type": "boolean" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "name", + "objective", + "one_line", + "grounds_law_ids", + "novelty", + "testable_now", + "supporting_evidence_ids" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "mechanism", + "theories", + "novelty_summary", + "new_predictions", + "open_threads", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/theory_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theory_report": { + "$ref": "#/$defs/theory_report" + } + }, + "required": [ + "theory_report", + "artifacts" + ], + "title": "theory_synthesis", + "type": "object" + } + }, + "verification_synthesis": { + "output": { + "artifacts": [ + "artifact" + ], + "verification_report": "verification_report" + }, + "schema": { + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "verification_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "novelty_by_verification": { + "items": { + "additionalProperties": true, + "properties": { + "audit_survived": { + "type": "boolean" + }, + "claim": { + "type": "string" + }, + "data_used": { + "type": "string" + }, + "effect_size": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "claim", + "novelty", + "outcome", + "effect_size", + "data_used", + "audit_survived" + ], + "type": "object" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + }, + "what_could_not_be_tested": { + "items": { + "type": "string" + }, + "type": "array" + }, + "what_was_tested": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "novelty_by_verification", + "what_was_tested", + "what_could_not_be_tested", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/verification_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "verification_report": { + "$ref": "#/$defs/verification_report" + } + }, + "required": [ + "verification_report", + "artifacts" + ], + "title": "verification_synthesis", + "type": "object" + } + } + } +} diff --git a/skills/research-step/assets/compiled/gap_synthesis.schema.json b/skills/research-step/assets/compiled/gap_synthesis.schema.json new file mode 100644 index 0000000..760fbb5 --- /dev/null +++ b/skills/research-step/assets/compiled/gap_synthesis.schema.json @@ -0,0 +1,221 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "data_gaps_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "arose_in": { + "type": "string" + }, + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity", + "arose_in" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "next_steps": { + "items": { + "$ref": "#/$defs/next_run_proposal" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "gaps", + "next_steps", + "figures", + "links" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "next_run_proposal": { + "additionalProperties": true, + "properties": { + "data_needed": { + "type": "string" + }, + "expected_signature": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "priority": { + "enum": [ + "high", + "medium", + "low" + ] + }, + "tests": { + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "kind", + "title", + "tests", + "data_needed", + "expected_signature", + "priority" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/gap_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "data_gaps_report": { + "$ref": "#/$defs/data_gaps_report" + } + }, + "required": [ + "data_gaps_report", + "artifacts" + ], + "title": "gap_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/holdout_replication.schema.json b/skills/research-step/assets/compiled/holdout_replication.schema.json new file mode 100644 index 0000000..9d18252 --- /dev/null +++ b/skills/research-step/assets/compiled/holdout_replication.schema.json @@ -0,0 +1,167 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "adjudication": { + "additionalProperties": true, + "properties": { + "data_used": { + "type": "string" + }, + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "prespecified_check": { + "type": "string" + }, + "subject_id": { + "type": "string" + }, + "subject_kind": { + "enum": [ + "empirical_law", + "theory", + "hypothesis" + ] + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "subject_kind", + "subject_id", + "outcome", + "testability", + "effect_size_observed", + "prespecified_check", + "independence_axes", + "data_used", + "evidence" + ], + "type": "object" + }, + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/holdout_replication.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "adjudication": { + "$ref": "#/$defs/adjudication" + }, + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + } + }, + "required": [ + "adjudication", + "figures", + "artifacts" + ], + "title": "holdout_replication", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/hypothesis_formation.schema.json b/skills/research-step/assets/compiled/hypothesis_formation.schema.json new file mode 100644 index 0000000..694d94f --- /dev/null +++ b/skills/research-step/assets/compiled/hypothesis_formation.schema.json @@ -0,0 +1,126 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "hypothesis": { + "additionalProperties": true, + "properties": { + "falsifiable_prediction": { + "type": "string" + }, + "grounds": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "id": { + "type": "string" + }, + "rationale": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "rationale", + "falsifiable_prediction", + "grounds" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/hypothesis_formation.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "hypotheses": { + "items": { + "$ref": "#/$defs/hypothesis" + }, + "type": "array" + } + }, + "required": [ + "hypotheses", + "artifacts" + ], + "title": "hypothesis_formation", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json new file mode 100644 index 0000000..b2fe767 --- /dev/null +++ b/skills/research-step/assets/compiled/hypothesis_synthesis.schema.json @@ -0,0 +1,224 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "hypothesis_report": { + "additionalProperties": true, + "properties": { + "answer": { + "type": "string" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "ledger": { + "items": { + "additionalProperties": true, + "properties": { + "effect_size_observed": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "hypothesis_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + } + }, + "required": [ + "hypothesis_id", + "statement", + "outcome", + "effect_size_observed", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "open_questions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "question": { + "type": "string" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "question", + "ledger", + "answer", + "open_questions", + "figures", + "gaps", + "links" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/hypothesis_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "hypothesis_report": { + "$ref": "#/$defs/hypothesis_report" + } + }, + "required": [ + "hypothesis_report", + "artifacts" + ], + "title": "hypothesis_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/law_extraction.schema.json b/skills/research-step/assets/compiled/law_extraction.schema.json new file mode 100644 index 0000000..7b3e1fc --- /dev/null +++ b/skills/research-step/assets/compiled/law_extraction.schema.json @@ -0,0 +1,139 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "empirical_law": { + "additionalProperties": true, + "properties": { + "construct": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "grouping_rationale": { + "type": "string" + }, + "id": { + "type": "string" + }, + "mcts_provenance": { + "additionalProperties": true, + "properties": { + "is_surprising": { + "type": "boolean" + }, + "posterior_belief": { + "type": "object" + }, + "prior_belief": { + "type": "object" + }, + "surprise": { + "type": "number" + } + }, + "required": [ + "surprise", + "is_surprising", + "prior_belief", + "posterior_belief" + ], + "type": "object" + }, + "source_node": { + "type": "string" + }, + "source_operationalization": { + "type": "string" + }, + "statement": { + "type": "string" + } + }, + "required": [ + "id", + "statement", + "construct", + "source_operationalization", + "source_node", + "effect_size_source", + "grouping_rationale" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/law_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "empirical_laws": { + "items": { + "$ref": "#/$defs/empirical_law" + }, + "type": "array" + } + }, + "required": [ + "empirical_laws", + "artifacts" + ], + "title": "law_extraction", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/literature_review.schema.json b/skills/research-step/assets/compiled/literature_review.schema.json new file mode 100644 index 0000000..14df7b7 --- /dev/null +++ b/skills/research-step/assets/compiled/literature_review.schema.json @@ -0,0 +1,150 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "literature_review": { + "additionalProperties": true, + "properties": { + "citations": { + "items": { + "additionalProperties": true, + "properties": { + "corpus_id": { + "type": "number" + }, + "id": { + "type": "string" + }, + "relevance": { + "type": "string" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "id", + "corpus_id", + "title", + "url", + "relevance" + ], + "type": "object" + }, + "type": "array" + }, + "key_findings": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "open_gaps": { + "items": { + "type": "string" + }, + "type": "array" + }, + "summary": { + "type": "string" + } + }, + "required": [ + "summary", + "key_findings", + "open_gaps", + "citations" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/literature_review.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "literature_review": { + "$ref": "#/$defs/literature_review" + } + }, + "required": [ + "literature_review", + "artifacts" + ], + "title": "literature_review", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/novelty_assessment.schema.json b/skills/research-step/assets/compiled/novelty_assessment.schema.json new file mode 100644 index 0000000..729f9fe --- /dev/null +++ b/skills/research-step/assets/compiled/novelty_assessment.schema.json @@ -0,0 +1,147 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory_evaluation": { + "additionalProperties": true, + "properties": { + "explanation": { + "type": "string" + }, + "id": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "overall_support": { + "enum": [ + "supports", + "mixed", + "contradicts", + "inconclusive" + ] + }, + "overall_support_raw": { + "type": "string" + }, + "statement_evaluations": { + "items": { + "additionalProperties": true, + "properties": { + "explanation": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "statement_index": { + "type": "number" + } + }, + "required": [ + "statement_index", + "novelty", + "explanation" + ], + "type": "object" + }, + "type": "array" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "id", + "theory_id", + "novelty", + "overall_support", + "explanation", + "statement_evaluations" + ], + "type": "object" + } + }, + "$id": "asta-research-step/novelty_assessment.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theory_evaluations": { + "items": { + "$ref": "#/$defs/theory_evaluation" + }, + "type": "array" + } + }, + "required": [ + "theory_evaluations", + "artifacts" + ], + "title": "novelty_assessment", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/provenance_extraction.schema.json b/skills/research-step/assets/compiled/provenance_extraction.schema.json new file mode 100644 index 0000000..2bd4ea8 --- /dev/null +++ b/skills/research-step/assets/compiled/provenance_extraction.schema.json @@ -0,0 +1,163 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "extracted_data": { + "additionalProperties": true, + "properties": { + "extraction_schema_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "rows": { + "items": { + "additionalProperties": true, + "properties": { + "brief_description": { + "type": "string" + }, + "citation_title": { + "type": "string" + }, + "name_full": { + "type": "string" + }, + "name_short": { + "type": "string" + }, + "uuid": { + "type": "string" + } + }, + "required": [ + "name_short", + "name_full", + "brief_description", + "citation_title", + "uuid" + ], + "type": "object" + }, + "type": "array" + }, + "run_id": { + "type": "string" + } + }, + "required": [ + "id", + "run_id", + "paper_id", + "extraction_schema_id", + "rows" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "source_access": { + "additionalProperties": true, + "properties": { + "data_availability": { + "type": "string" + }, + "data_source_id": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "repository": { + "type": "string" + } + }, + "required": [ + "data_source_id", + "data_availability", + "repository", + "identifier" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_extraction.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "extracted_data": { + "$ref": "#/$defs/extracted_data" + }, + "source_access": { + "items": { + "$ref": "#/$defs/source_access" + }, + "type": "array" + } + }, + "required": [ + "extracted_data", + "source_access", + "artifacts" + ], + "title": "provenance_extraction", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/provenance_search.schema.json b/skills/research-step/assets/compiled/provenance_search.schema.json new file mode 100644 index 0000000..8a924d9 --- /dev/null +++ b/skills/research-step/assets/compiled/provenance_search.schema.json @@ -0,0 +1,107 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "data_source": { + "additionalProperties": true, + "properties": { + "dataset_id": { + "type": "string" + }, + "id": { + "type": "string" + }, + "paper_id": { + "type": "string" + }, + "paper_title": { + "type": "string" + }, + "paper_url": { + "type": "string" + } + }, + "required": [ + "id", + "dataset_id", + "paper_id", + "paper_title", + "paper_url" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_search.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "data_sources": { + "items": { + "$ref": "#/$defs/data_source" + }, + "type": "array" + } + }, + "required": [ + "data_sources", + "artifacts" + ], + "title": "provenance_search", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/provenance_synthesis.schema.json b/skills/research-step/assets/compiled/provenance_synthesis.schema.json new file mode 100644 index 0000000..0d43a6f --- /dev/null +++ b/skills/research-step/assets/compiled/provenance_synthesis.schema.json @@ -0,0 +1,230 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "provenance_report": { + "additionalProperties": true, + "properties": { + "acquired": { + "items": { + "type": "string" + }, + "type": "array" + }, + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "method_note": { + "type": "string" + }, + "not_acquired": { + "items": { + "type": "string" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "sources": { + "items": { + "additionalProperties": true, + "properties": { + "access_status": { + "enum": [ + "acquired", + "open_unfetched", + "restricted", + "not_found" + ] + }, + "dataset_id": { + "type": "string" + }, + "local_path": { + "type": "string" + }, + "paper_title": { + "type": "string" + }, + "paper_url": { + "type": "string" + }, + "repository": { + "type": "string" + } + }, + "required": [ + "dataset_id", + "paper_title", + "paper_url", + "repository", + "access_status", + "local_path" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "sources", + "method_note", + "acquired", + "not_acquired", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/provenance_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "provenance_report": { + "$ref": "#/$defs/provenance_report" + } + }, + "required": [ + "provenance_report", + "artifacts" + ], + "title": "provenance_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/reproduction_synthesis.schema.json b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json new file mode 100644 index 0000000..570e076 --- /dev/null +++ b/skills/research-step/assets/compiled/reproduction_synthesis.schema.json @@ -0,0 +1,253 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "reproduction_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "laws_ledger": { + "items": { + "additionalProperties": true, + "properties": { + "effect_size_observed": { + "type": "string" + }, + "effect_size_source": { + "type": "string" + }, + "evidence": { + "type": "string" + }, + "independence_axes": { + "items": { + "enum": [ + "region", + "instrument", + "method", + "construct", + "temporal", + "population" + ] + }, + "type": "array" + }, + "law_id": { + "type": "string" + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "statement": { + "type": "string" + }, + "testability": { + "enum": [ + "tested", + "proxy_only", + "untestable" + ] + } + }, + "required": [ + "law_id", + "statement", + "outcome", + "testability", + "effect_size_source", + "effect_size_observed", + "independence_axes", + "evidence" + ], + "type": "object" + }, + "type": "array" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "method_note": { + "type": "string" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + }, + "what_failed_or_untestable": { + "items": { + "type": "string" + }, + "type": "array" + }, + "what_held": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "report_path", + "title", + "headline", + "method_note", + "laws_ledger", + "what_held", + "what_failed_or_untestable", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/reproduction_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "reproduction_report": { + "$ref": "#/$defs/reproduction_report" + } + }, + "required": [ + "reproduction_report", + "artifacts" + ], + "title": "reproduction_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/testability_triage.schema.json b/skills/research-step/assets/compiled/testability_triage.schema.json new file mode 100644 index 0000000..8968920 --- /dev/null +++ b/skills/research-step/assets/compiled/testability_triage.schema.json @@ -0,0 +1,144 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "testability_triage": { + "additionalProperties": true, + "properties": { + "assessments": { + "items": { + "additionalProperties": true, + "properties": { + "available_data": { + "type": "string" + }, + "gap": { + "type": "string" + }, + "proposed_test": { + "additionalProperties": true, + "properties": { + "metric": { + "type": "string" + }, + "success_threshold": { + "type": "string" + }, + "test": { + "type": "string" + } + }, + "required": [ + "test", + "metric", + "success_threshold" + ], + "type": "object" + }, + "required_data": { + "type": "string" + }, + "testable_now": { + "type": "boolean" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "testable_now", + "available_data", + "required_data", + "proposed_test", + "gap" + ], + "type": "object" + }, + "type": "array" + }, + "testable_theory_ids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "assessments", + "testable_theory_ids" + ], + "type": "object" + } + }, + "$id": "asta-research-step/testability_triage.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "testability_triage": { + "$ref": "#/$defs/testability_triage" + } + }, + "required": [ + "testability_triage", + "artifacts" + ], + "title": "testability_triage", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/theory_formation.schema.json b/skills/research-step/assets/compiled/theory_formation.schema.json new file mode 100644 index 0000000..7373cec --- /dev/null +++ b/skills/research-step/assets/compiled/theory_formation.schema.json @@ -0,0 +1,240 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory": { + "additionalProperties": true, + "properties": { + "components": { + "additionalProperties": true, + "properties": { + "generation_objective": { + "type": "string" + }, + "new_predictions_likely": { + "items": { + "type": "string" + }, + "type": "array" + }, + "new_predictions_unknown": { + "items": { + "type": "string" + }, + "type": "array" + }, + "theory_statements": { + "items": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "statement_name": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + }, + "theory_statement": { + "type": "string" + } + }, + "required": [ + "statement_name", + "theory_statement", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "type": "array" + }, + "unaccounted_for": { + "items": { + "additionalProperties": true, + "properties": { + "text": { + "type": "string" + }, + "uuids": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "text", + "uuids" + ], + "type": "object" + }, + "type": "array" + } + }, + "required": [ + "generation_objective", + "theory_statements", + "new_predictions_likely", + "new_predictions_unknown", + "unaccounted_for" + ], + "type": "object" + }, + "description": { + "type": "string" + }, + "grounds_law_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "objective": { + "enum": [ + "accuracy_focused", + "novelty_focused" + ] + }, + "supporting_evidence_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "theory_query": { + "type": "string" + } + }, + "required": [ + "id", + "name", + "description", + "theory_query", + "objective", + "grounds_law_ids", + "supporting_evidence_ids", + "components" + ], + "type": "object" + } + }, + "$id": "asta-research-step/theory_formation.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theories": { + "items": { + "$ref": "#/$defs/theory" + }, + "type": "array" + } + }, + "required": [ + "theories", + "artifacts" + ], + "title": "theory_formation", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/theory_synthesis.schema.json b/skills/research-step/assets/compiled/theory_synthesis.schema.json new file mode 100644 index 0000000..dd2768e --- /dev/null +++ b/skills/research-step/assets/compiled/theory_synthesis.schema.json @@ -0,0 +1,280 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "theory_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "mechanism": { + "additionalProperties": true, + "properties": { + "conflicting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + }, + "grounded_in": { + "items": { + "type": "string" + }, + "type": "array" + }, + "statement": { + "type": "string" + }, + "supporting_evidence": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "statement", + "grounded_in", + "supporting_evidence", + "conflicting_evidence" + ], + "type": "object" + }, + "new_predictions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "novelty_summary": { + "type": "string" + }, + "open_threads": { + "items": { + "type": "string" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "theories": { + "items": { + "additionalProperties": true, + "properties": { + "grounds_law_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "name": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "objective": { + "enum": [ + "accuracy_focused", + "novelty_focused" + ] + }, + "one_line": { + "type": "string" + }, + "supporting_evidence_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "testable_now": { + "type": "boolean" + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "name", + "objective", + "one_line", + "grounds_law_ids", + "novelty", + "testable_now", + "supporting_evidence_ids" + ], + "type": "object" + }, + "type": "array" + }, + "title": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "mechanism", + "theories", + "novelty_summary", + "new_predictions", + "open_threads", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/theory_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "theory_report": { + "$ref": "#/$defs/theory_report" + } + }, + "required": [ + "theory_report", + "artifacts" + ], + "title": "theory_synthesis", + "type": "object" +} diff --git a/skills/research-step/assets/compiled/verification_synthesis.schema.json b/skills/research-step/assets/compiled/verification_synthesis.schema.json new file mode 100644 index 0000000..8d1a639 --- /dev/null +++ b/skills/research-step/assets/compiled/verification_synthesis.schema.json @@ -0,0 +1,232 @@ +{ + "$comment": "generated by scripts/compile-schemas.py from assets/schemas.yaml; do not edit", + "$defs": { + "artifact": { + "additionalProperties": true, + "properties": { + "artifactId": { + "type": "string" + }, + "description": { + "type": "string" + }, + "extensions": { + "items": { + "type": "string" + }, + "type": "array" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "parts": { + "items": { + "$ref": "#/$defs/part" + }, + "type": "array" + } + }, + "required": [ + "artifactId", + "name", + "description", + "parts" + ], + "type": "object" + }, + "figure": { + "additionalProperties": true, + "properties": { + "caption": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "caption", + "image" + ], + "type": "object" + }, + "part": { + "additionalProperties": true, + "properties": { + "kind": { + "type": "string" + }, + "metadata": { + "type": "object" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "verification_report": { + "additionalProperties": true, + "properties": { + "figures": { + "items": { + "$ref": "#/$defs/figure" + }, + "type": "array" + }, + "gaps": { + "items": { + "additionalProperties": true, + "properties": { + "blocks": { + "type": "string" + }, + "item": { + "type": "string" + }, + "missing_data": { + "type": "string" + }, + "severity": { + "enum": [ + "high", + "medium", + "low" + ] + } + }, + "required": [ + "item", + "missing_data", + "blocks", + "severity" + ], + "type": "object" + }, + "type": "array" + }, + "headline": { + "type": "string" + }, + "links": { + "items": { + "additionalProperties": true, + "properties": { + "label": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": [ + "label", + "ref" + ], + "type": "object" + }, + "type": "array" + }, + "novelty_by_verification": { + "items": { + "additionalProperties": true, + "properties": { + "audit_survived": { + "type": "boolean" + }, + "claim": { + "type": "string" + }, + "data_used": { + "type": "string" + }, + "effect_size": { + "type": "string" + }, + "novelty": { + "enum": [ + "established", + "derivable", + "genuinely_new" + ] + }, + "outcome": { + "enum": [ + "held", + "partial", + "failed", + "underpowered", + "n/a" + ] + }, + "theory_id": { + "type": "string" + } + }, + "required": [ + "theory_id", + "claim", + "novelty", + "outcome", + "effect_size", + "data_used", + "audit_survived" + ], + "type": "object" + }, + "type": "array" + }, + "report_path": { + "type": "string" + }, + "title": { + "type": "string" + }, + "what_could_not_be_tested": { + "items": { + "type": "string" + }, + "type": "array" + }, + "what_was_tested": { + "type": "string" + } + }, + "required": [ + "report_path", + "title", + "headline", + "novelty_by_verification", + "what_was_tested", + "what_could_not_be_tested", + "figures", + "gaps", + "links" + ], + "type": "object" + } + }, + "$id": "asta-research-step/verification_synthesis.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/artifact" + }, + "type": "array" + }, + "verification_report": { + "$ref": "#/$defs/verification_report" + } + }, + "required": [ + "verification_report", + "artifacts" + ], + "title": "verification_synthesis", + "type": "object" +} diff --git a/skills/research-step/scripts/close-task.sh b/skills/research-step/scripts/close-task.sh new file mode 100755 index 0000000..7535a38 --- /dev/null +++ b/skills/research-step/scripts/close-task.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# close-task.sh <issue-id> <output-json> <output-markdown> +# Publish a task's output and finish it: write output_json + output_markdown into the issue +# metadata, validate output_json against the schema, close the issue, assert it closed, then +# close any ancestor group whose last child just closed. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -eq 3 ]] || { echo "usage: close-task.sh <issue-id> <output-json> <output-markdown>" >&2; exit 1; } +id="$1"; oj="$2"; om="$3" +[[ -f "$oj" ]] || { echo "close-task: no output-json $oj" >&2; exit 1; } +[[ -f "$om" ]] || { echo "close-task: no output-markdown $om" >&2; exit 1; } +jq -e . "$oj" >/dev/null 2>&1 || { echo "close-task: $oj is not valid JSON" >&2; exit 1; } + +# 1. publish: merge output_json + output_markdown into the existing research_step metadata +cur="$(bd show "$id" --json | jq -c '.[0].metadata')" +merged="$(jq -c --slurpfile oj "$oj" --rawfile om "$om" \ + '.research_step.output_json = $oj[0] | .research_step.output_markdown = $om' <<<"$cur")" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$merged" > "$tmp" +bd update "$id" --metadata @"$tmp" >/dev/null + +# 2. validate structurally (reads the issue back; no style lint) +bash "$here/validate-output.sh" "$id" + +# 3. close and 4. assert closure +bd close "$id" >/dev/null +[[ "$(bd show "$id" --json | jq -r '.[0].status')" == "closed" ]] \ + || { echo "close-task: $id did not close" >&2; exit 2; } +echo "closed $id" + +# 5. cascade: close each ancestor group whose direct children are all closed. +# The epic root is never closed here — "root open, no open tasks" is the +# session-complete state that epic-root.sh and the workflows rely on. +cur_id="$id" +while [[ "$cur_id" == *.* ]]; do + parent="${cur_id%.*}" + parent_json="$(bd show "$parent" --json 2>/dev/null)" || break + [[ "$(jq -r '.[0].metadata.research_step.epic_root // false' <<<"$parent_json")" == "true" ]] && break + open_kids="$(bd list --json --limit 0 | jq --arg p "$parent" ' + [ .[] + | select(.id | startswith($p + ".")) + | select((.id[($p|length)+1:] | contains(".")) | not) + | select(.status != "closed") ] | length')" + [[ "$open_kids" -eq 0 ]] || break + if bd close "$parent" >/dev/null 2>&1; then + echo "closed group $parent" + else + echo "close-task: warning: could not close group $parent (task $id is closed; close the group manually)" >&2 + break + fi + cur_id="$parent" +done diff --git a/skills/research-step/scripts/create-task.sh b/skills/research-step/scripts/create-task.sh new file mode 100755 index 0000000..1e992a9 --- /dev/null +++ b/skills/research-step/scripts/create-task.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# create-task.sh <parent-id> <task_type> <flow> <title> <brief-description> [input-id ...] +# Create a leaf task issue under <parent-id>: hierarchical id, a brief one-line description, +# and initialized research_step metadata. output_json / output_markdown stay null until +# execute publishes them via close-task.sh. Prints the new issue id. +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +[[ $# -ge 5 ]] || { echo "usage: create-task.sh <parent-id> <task_type> <flow> <title> <brief-desc> [input-id ...]" >&2; exit 1; } +parent="$1"; task_type="$2"; flow="$3"; title="$4"; desc="$5"; shift 5 + +# Validate the task_type against schemas.yaml. The helper exits 3 for an +# unknown task_type (and prints the known ones) or 5 when the schema cannot +# be read (e.g. PyYAML missing — run init); set -e propagates either. +"$here/task-output-keys.sh" "$task_type" >/dev/null + +[[ -n "$desc" ]] || { echo "create-task: a brief description is required" >&2; exit 4; } +[[ "$desc" != *$'\n'* ]] || { echo "create-task: description must be one line" >&2; exit 4; } +[[ "${#desc}" -le 200 ]] || { echo "create-task: description too long (${#desc} chars > 200) — keep it brief" >&2; exit 4; } + +if [[ $# -eq 0 ]]; then inputs_json="[]"; else inputs_json="$(printf '%s\n' "$@" | jq -R . | jq -cs .)"; fi +meta="$(jq -nc --arg f "$flow" --arg tt "$task_type" --argjson inp "$inputs_json" \ + '{research_step: {flow: $f, task_type: $tt, inputs: $inp, output_schema_version: 2, output_json: null, output_markdown: null}}')" +tmp="$(mktemp)"; trap 'rm -f "$tmp"' EXIT +printf '%s' "$meta" > "$tmp" +bd create "$title" --parent "$parent" -d "$desc" --metadata @"$tmp" --silent diff --git a/skills/research-step/scripts/next-task.sh b/skills/research-step/scripts/next-task.sh new file mode 100755 index 0000000..97e3592 --- /dev/null +++ b/skills/research-step/scripts/next-task.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# next-task.sh — the single definition of task ordering. Prints the open task +# issues (status == open, metadata.research_step.task_type set), sorted +# *numerically* by hierarchical id (wf.1.2 before wf.1.10 — a plain lexical +# sort would get this wrong past 9 siblings). Groups (no task_type) are never +# listed; there are no dependency edges, so this order is the ordering signal. +# +# Used by execute (pick the next task) and update-summary (render the queue), +# so the two never disagree about what runs next. +# +# Output (stdout, key: value lines): +# next: <bd-id> | none +# queue: <space-separated bd-ids> (omitted when empty) +# Exit: 0 (even when next: none) · 3 bd/jq missing +set -euo pipefail + +command -v bd >/dev/null 2>&1 || { echo "next-task: 'bd' not found on PATH" >&2; exit 3; } +command -v jq >/dev/null 2>&1 || { echo "next-task: 'jq' not found on PATH" >&2; exit 3; } + +ids="$(bd list --json --limit 0 | jq -r ' + [ .[] + | select(.status == "open") + | select(.metadata.research_step.task_type != null) ] + | sort_by(.id | split(".") | map(tonumber? // .)) + | .[].id')" + +if [[ -z "$ids" ]]; then + echo "next: none" + exit 0 +fi + +echo "next: $(head -n1 <<<"$ids")" +rest="$(tail -n +2 <<<"$ids" | tr '\n' ' ' | sed 's/ $//')" +[[ -n "$rest" ]] && echo "queue: $rest" || true diff --git a/skills/research-step/scripts/task-output-keys.sh b/skills/research-step/scripts/task-output-keys.sh new file mode 100755 index 0000000..ef1269b --- /dev/null +++ b/skills/research-step/scripts/task-output-keys.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# task-output-keys.sh <task_type> — print the space-separated output keys for a +# task from assets/schemas.yaml. The single schema reader for scripts: +# create-task.sh uses it to validate a task_type, validate-output.sh to get the +# expected output_json keys. +# Exit: 0 ok · 1 usage · 3 unknown task_type · 5 cannot read schema +# (python3/PyYAML missing or schemas.yaml unreadable — run init) +set -euo pipefail +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +schemas="$here/../assets/schemas.yaml" + +[[ $# -eq 1 ]] || { echo "usage: task-output-keys.sh <task_type>" >&2; exit 1; } + +python3 - "$schemas" "$1" <<'PY' +import sys + +try: + import yaml +except ImportError: + print("task-output-keys: python3 cannot import yaml (PyYAML) - run the init workflow", file=sys.stderr) + sys.exit(5) + +try: + with open(sys.argv[1]) as f: + d = yaml.safe_load(f) +except Exception as e: + print(f"task-output-keys: cannot read {sys.argv[1]}: {e}", file=sys.stderr) + sys.exit(5) + +tasks = d.get("tasks") or {} +t = tasks.get(sys.argv[2]) +if t is None: + print(f"task-output-keys: unknown task_type '{sys.argv[2]}'", file=sys.stderr) + print(f"task-output-keys: known: {' '.join(sorted(tasks))}", file=sys.stderr) + sys.exit(3) +print(" ".join(t["output"])) +PY diff --git a/src/asta/analyze_data/poll.py b/src/asta/analyze_data/poll.py index f2e0af5..9372a71 100644 --- a/src/asta/analyze_data/poll.py +++ b/src/asta/analyze_data/poll.py @@ -4,29 +4,25 @@ analyze-data skill body. Status ticks go to stderr; the final Task JSON goes to ``--output`` (or stdout) so the harness's background-task log shows progress without polluting the captured payload. + +Delegates the actual polling + rendering to the shared +``asta_agent.a2a.commands._poll_until_terminal``, so step-progress +events, parent/child indent, elapsed times, and artifact lines all +surface here — the previous bespoke loop only emitted ``state=...`` +ticks (and one per poll, even when nothing changed). """ from __future__ import annotations -import time -from datetime import datetime +import json import click -from a2a.types import Task, TaskState -from asta_agent.a2a.client import A2AClient, A2AError +from asta_agent.a2a.client import A2AClient +from asta_agent.a2a.commands import _poll_until_terminal from asta.analyze_data._url import dv_url from asta.utils.auth_helper import get_access_token -_TERMINAL_STATES = { - TaskState.completed, - TaskState.failed, - TaskState.input_required, - TaskState.canceled, - TaskState.rejected, - TaskState.auth_required, -} - @click.command() @click.argument("task_id") @@ -39,44 +35,23 @@ ) @click.option( "--interval", - default=60, - show_default=True, + default=None, type=click.IntRange(min=1), - help="Seconds between polls.", + help="Seconds between polls. Omit for the SDK's adaptive cadence " + "(5×6 then 15×20 then 60s).", ) -def poll(task_id: str, output: str | None, interval: int) -> None: +def poll(task_id: str, output: str | None, interval: int | None) -> None: """Poll TASK_ID until it reaches a terminal state, then emit the final Task JSON. Terminal states: completed, failed, input-required, canceled, rejected, auth-required. - Status ticks ([HH:MM:SS] state=...) are written to stderr; transient errors - are logged and retried. + Progress lines ([HH:MM:SS] state=…, step labels, artifacts) go to stderr; + the final Task JSON goes to --output (or stdout). """ client = A2AClient(dv_url(), api_key=get_access_token()) - - while True: - ts = datetime.now().strftime("%H:%M:%S") - try: - result = client.get_task(task_id) - parsed = Task.model_validate(result) - except A2AError as e: - click.echo(f"[{ts}] error: {e.code} {e}", err=True) - time.sleep(interval) - continue - except Exception as e: - click.echo(f"[{ts}] error: {e}", err=True) - time.sleep(interval) - continue - - state = parsed.status.state - click.echo(f"[{ts}] state={state.value}", err=True) - - if state in _TERMINAL_STATES: - payload = parsed.model_dump_json(by_alias=True, indent=2, exclude_none=True) - if output: - with open(output, "w") as f: - f.write(payload) - else: - click.echo(payload) - return - - time.sleep(interval) + final = _poll_until_terminal(client, task_id, interval=interval) + payload = json.dumps(final, indent=2) + if output: + with open(output, "w") as f: + f.write(payload) + else: + click.echo(payload) diff --git a/src/asta/auto_exp_designer.py b/src/asta/auto_exp_designer.py new file mode 100644 index 0000000..21b35ce --- /dev/null +++ b/src/asta/auto_exp_designer.py @@ -0,0 +1,20 @@ +from asta_agent.a2a.commands import make_a2a_group + +from asta.utils.auth_helper import get_access_token +from asta.utils.config import get_api_config + + +def _auto_exp_designer_url() -> str: + return get_api_config("auto-exp-designer")["base_url"] + + +auto_exp_designer = make_a2a_group( + name="auto-exp-designer", + url_factory=_auto_exp_designer_url, + token_factory=get_access_token, + help=( + "Design computational experiments via the Auto Experiment Designer agent.\n\n" + "Subcommands talk to the agent through asta-gateway. Auth comes from\n" + "`asta auth login`." + ), +) diff --git a/src/asta/cli.py b/src/asta/cli.py index 6e040e4..ca69e85 100644 --- a/src/asta/cli.py +++ b/src/asta/cli.py @@ -5,10 +5,12 @@ from asta import __version__ from asta.analyze_data import analyze_data +from asta.auto_exp_designer import auto_exp_designer from asta.autodiscovery.commands import autodiscovery from asta.commands.auth import auth from asta.documents import documents from asta.experiment import experiment +from asta.flows import flows from asta.literature.find import find from asta.literature.interactive import interactive from asta.papers.author import author @@ -51,6 +53,9 @@ def papers(): # Register analyze-data commands cli.add_command(analyze_data) +# Register auto-exp-designer commands +cli.add_command(auto_exp_designer) + # Register artifacts command cli.add_command(artifacts, name="artifacts") @@ -59,6 +64,7 @@ def papers(): cli.add_command(experiment) cli.add_command(pdf_extraction) cli.add_command(autodiscovery) +cli.add_command(flows) # Register literature subcommands literature.add_command(find) diff --git a/src/asta/flows/__init__.py b/src/asta/flows/__init__.py new file mode 100644 index 0000000..3159a79 --- /dev/null +++ b/src/asta/flows/__init__.py @@ -0,0 +1,5 @@ +"""Flows subcommand - pass-through to asta-flows CLI""" + +from .passthrough import flows + +__all__ = ["flows"] diff --git a/src/asta/flows/passthrough.py b/src/asta/flows/passthrough.py new file mode 100644 index 0000000..5d565b5 --- /dev/null +++ b/src/asta/flows/passthrough.py @@ -0,0 +1,15 @@ +"""Pass-through command for asta-flows CLI""" + +from asta.utils.config import get_config +from asta.utils.passthrough import create_passthrough_command + +config = get_config()["passthrough"]["flows"] + +flows = create_passthrough_command( + tool_name=config["tool_name"], + install_type=config["install_type"], + install_source=config["install_source"], + minimum_version=config["minimum_version"], + command_name=config["command_name"], + docstring=config["docstring"], +) diff --git a/src/asta/utils/asta.conf b/src/asta/utils/asta.conf index fecb174..eae6da4 100644 --- a/src/asta/utils/asta.conf +++ b/src/asta/utils/asta.conf @@ -65,6 +65,11 @@ apis { base_url = ${auth.gateway_url}"/api/analyze-data" base_url = ${?ASTA_ANALYZE_DATA_URL} } + + # Auto Experiment Designer A2A agent + auto-exp-designer { + base_url = ${auth.gateway_url}"/api/auto-exp-designer" + } } # Passthrough command configurations @@ -107,4 +112,13 @@ passthrough { docstring = "Extract text from PDFs using olmOCR" } + flows { + tool_name = "asta-flows" + install_type = "local" + install_source = "~/workspace/asta-flows" + minimum_version = "0.1.0" + command_name = "flows" + docstring = "Live web UI for research-step runs" + } + }