ChicagoHAI · bimu233 · Apr 25, 2026 · Apr 26, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/src/core/runner.py b/src/core/runner.py
@@ -300,6 +300,16 @@ def run_research(self, idea_id: str,
         if use_scribe:
             (work_dir / "notebooks").mkdir(parents=True, exist_ok=True)
 
+        # Initialize STATE.md from template if not already present.
+        # The agent reads and updates this file; the pipeline owns creation so
+        # the template structure (italic description lines) is always correct.
+        state_md = work_dir / "STATE.md"
+        if not state_md.exists():
+            state_template = self.project_root / "templates" / "base" / "deliverables" / "state_template.md"
+            if state_template.exists():
+                state_md.write_text(state_template.read_text(encoding='utf-8'), encoding='utf-8')
+                print(f"   Initialized STATE.md")
+
         # Copy helper scripts to workspace
         self._copy_workspace_resources(work_dir)
 

diff --git a/templates/base/deliverables/state_template.md b/templates/base/deliverables/state_template.md
@@ -0,0 +1,55 @@
+# Session Title
+_5-10 word descriptive title summarizing the research task. Update each phase._
+
+# Current State
+_Active phase number and name. What is immediately pending. The single most important next action.
+Write a Phase Status table (all 7 phases: DONE / IN PROGRESS / pending) then the active action.
+Update this section whenever the phase status or active action changes._
+
+Phase 0 — Motivation:     pending
+Phase 1 — Planning:       pending
+Phase 2 — Setup:          pending
+Phase 3 — Implementation: pending
+Phase 4 — Analysis:       pending
+Phase 5 — Documentation:  pending
+Phase 6 — Validation:     pending
+
+Active: Phase 0 — Motivation: [next action here]
+
+# Worklog
+_One terse line per significant action. Phase transitions explicitly noted.
+Format: [Phase N — Name] action taken — outcome._
+
+
+# Research Specification
+_Hypothesis being tested. Datasets used (name + absolute path). Evaluation metrics and how computed. Key constraints.
+Written in Phase 1. NEVER overwrite this content — only append corrections below it._
+
+
+# Files and Resources
+_Important files with absolute paths. Datasets: name, location, size. Key scripts and what they do.
+Model checkpoints. Configuration files. Output directories._
+
+# Workflow
+_Final successful reproduction commands only: environment activation command, then the exact run command.
+Do not list failed attempts here — those belong in # Experiment Attempts._
+
+# Experiment Design
+_Baselines chosen and justification. Evaluation metrics and how computed.
+Hyperparameters, random seeds, train/val/test splits. Architecture decisions and rationale._
+
+# Experiment Attempts
+_One entry per run. Written BEFORE each run (Status: RUNNING) and updated AFTER (Status: FAILED or SUCCESS).
+Never delete or edit past entries — append only._
+
+# Experiment Results
+_Exact numerical results: metric name, value, std, conditions. Complete comparison tables.
+Record actual numbers, not prose summaries._
+
+# Learnings
+_What worked and why. What did not work and why. Surprising findings. Domain insights.
+Do not duplicate content already recorded in other sections._
+
+
+
+