diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..2b22236
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,47 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  lint:
+    name: ruff (lint)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: pip install ruff
+      - run: ruff check .
+
+  reproduce-analysis:
+    name: no-sim failure analysis (reproducibility smoke)
+    runs-on: ubuntu-latest
+    # ACT inference on a CPU runner is heavy; run on demand rather than on every push.
+    if: github.event_name == 'workflow_dispatch'
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install (lerobot + hub extras)
+        run: pip install -e ".[lerobot,hub]" "lerobot==0.5.2"
+      - name: Fetch minimal assets from the Hugging Face Hub
+        run: bash scripts/bootstrap_assets.sh --minimal
+      - name: Reproduce the analysis (no simulator)
+        run: bash experiments/act_push_failure/run_all.sh
+      - name: Assert the wrist-shortcut diagnostic holds
+        run: |
+          python - <<'PY'
+          import json
+          s = json.load(open("experiments/act_push_failure/results/push_summary.json"))
+          a = s["E2_camera_ablation"]
+          wrist, overhead = a["delta_black_wrist_only"], a["delta_black_overhead_only"]
+          print(f"black-wrist Δ={wrist:.3f}  black-overhead Δ={overhead:.3f}")
+          assert wrist > overhead, "expected push to rely on the wrist camera (wrist Δ > overhead Δ)"
+          print("OK: push policy is wrist-reliant, as diagnosed.")
+          PY
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..cdd4b6d
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,28 @@
+cff-version: 1.2.0
+message: "If you use sim2act in your work, please cite it."
+title: "sim2act: a VLA simulation data engine"
+abstract: >-
+  An end-to-end NVIDIA Isaac Lab pipeline for collecting multimodal Franka manipulation
+  demonstrations (a Warp state-machine oracle and a PPO RL teacher), converting them to the
+  LeRobot v3.0 format, training Action Chunking Transformer (ACT) policies, and evaluating them
+  closed-loop — together with a reproducible, simulator-free failure-analysis case study of a
+  camera-reliance shortcut in an imitation policy.
+type: software
+authors:
+  - family-names: Ma
+    given-names: Kevin
+license: Apache-2.0
+repository-code: "https://github.com/Kevinma0215/sim2act"
+url: "https://github.com/Kevinma0215/sim2act"
+version: "0.1.0"
+date-released: "2026-06-19"
+keywords:
+  - vision-language-action
+  - imitation-learning
+  - robot-learning
+  - action-chunking-transformer
+  - isaac-lab
+  - lerobot
+  - sim-to-real
+  - domain-randomization
+  - covariate-shift
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..47df74f
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,77 @@
+# CLAUDE.md — sim2act
+
+> Working notes for AI agents on this repo. Read before editing or running anything.
+> This file is the single source of truth for commands, conventions, and verified numbers —
+> prefer it over re-deriving facts from the code.
+
+## What this is
+
+**sim2act** — a VLA (vision-language-action) **simulation data engine** on NVIDIA Isaac Lab. A Franka
+arm performs pick / barrier / push; the repo takes each task from privileged oracle → multimodal demo
+collection → LeRobot v3.0 → ACT imitation learning → a closed-loop eval harness. Positioning is
+three-in-one: **data engine** (umbrella) + **rigorous failure diagnosis** + **end-to-end closed loop**.
+Audience: foundation-model robotics engineers.
+
+Origin: built as an **R&S (Robotics & Simulation) take-home challenge**, now generalized into a public
+project. **R&S = Robotics & Simulation — NOT "Rohde & Schwarz".** The old submission codename was
+"Corvinus" and is being retired (only `docs/archive/` may still mention it).
+
+## Conda environments (CRITICAL — there are two; do not mix them)
+
+Isaac Lab is pip-installed into the `isaaclab` conda env, so run stages with **plain `python`**
+(NOT `./isaaclab.sh -p`, despite older README text). Prefer `conda run -n <env> python ...`.
+
+| env | key versions | use for |
+|---|---|---|
+| **isaaclab** (py3.11) | isaacsim 5.1.0, isaaclab 0.54.4, lerobot **0.4.4**, torch 2.7, warp 1.14 | everything touching the simulator: collect, PPO RL train, LeRobot convert, eval — and the push-fix pipeline's ACT training (`fix_push_widen_dr.sh` runs end-to-end in this env) |
+| **lerobot** (py3.12) | lerobot **0.5.2**, torch 2.11, no Isaac Sim | the **no-sim** failure analysis `experiments/act_push_failure/run_all.sh` (requires 0.5.2) |
+
+Gotcha: the two envs ship different lerobot versions (0.4.4 vs 0.5.2); existing ACT checkpoints load
+under both. Keep each pipeline inside ONE env: run the whole `fix_push_widen_dr.sh` in `isaaclab`;
+run the no-sim ablation in `lerobot`.
+
+Hardware here: 1× RTX 5060 Ti (16 GB). PPO default is 4096 envs — may need fewer on 16 GB; SPEEDRUN uses 256.
+
+## Canonical commands (run from repo root)
+
+- Editable install: `conda run -n isaaclab python -m pip install -e .`
+- Collect (SM oracle): `python scripts/collect/demos.py --task pick_place|barrier --num_demos 50 --headless --enable_cameras`
+- Push RL chain: `python scripts/train/push_rl.py --headless --num_envs 4096` → `python scripts/rl/export_push.py --headless` → `python scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras`
+- Convert → LeRobot: `python data/convert_to_lerobot.py --input _out/datasets/<tag>_official_demos/dataset.hdf5 --output _out/datasets/lerobot/<tag> --state_keys joint_pos,joint_vel --no_depth`
+- Train ACT: `python scripts/train/act.py --dataset _out/datasets/lerobot/<tag> --steps 40000 --batch-size 8 [--wandb]`
+- Eval: `python scripts/eval/policy.py --policy act|oracle|dummy --task <t> --model_path <ckpt>/pretrained_model --num_rollouts 20 --headless --enable_cameras` (extras: `--ablate_camera overhead|wrist`, `--n_action_steps`, `--init_scale`, `--oracle-pose gt|noisy`)
+- No-sim push failure analysis: `conda run -n lerobot bash experiments/act_push_failure/run_all.sh`
+- Push fix (whole chain): `conda run -n isaaclab bash scripts/fix_push_widen_dr.sh` (run `SPEEDRUN=1` first). DR is set via `PUSH_BOX_DR` and shared by train/collect/eval.
+
+## Outputs: everything generated lives under `_out/` (gitignored)
+
+`_out/datasets/{<tag>_official_demos/dataset.hdf5 (raw HDF5), lerobot/<tag> (LeRobot v3.0)}`,
+`_out/rl/franka_push/<ts>/`, `_out/act/act_<tag>_run_<ts>/checkpoints/{<step>,last}/pretrained_model`,
+`_out/eval/*.json`, `_out/viz/`.
+The one generated thing that IS committed: `experiments/act_push_failure/results/` (analysis evidence —
+deliberately gitignore-excepted).
+
+## Verified numbers (cite verbatim; do not re-derive)
+
+From `experiments/act_push_failure/results/*_summary.json`:
+- push teacher-forcing EE-xy L1 = **0.011 m** (proves the model learned the demos).
+- push camera ablation: black-**wrist** Δ = **0.197** vs black-**overhead** Δ = **0.038** → wrist shortcut.
+- barrier ablation: black-overhead Δ = **0.089** vs black-wrist Δ = **0.027** → robust overhead.
+- barrier ACT **90%** in-dist vs SM oracle **75%**; OOD at init_scale 1.5 → **55%**. push ACT **0%** (pre-fix).
+- Root cause: push init DR ±3 cm (vs barrier ±13/±7 cm) → static overhead uninformative → policy takes the
+  wrist shortcut → closed-loop covariate-shift spiral.
+
+## Don't
+
+- Don't commit `_out/` or large media (`.gif/.webm/.mp4`) into git history (host on HF Hub / GitHub releases).
+- Don't rename the Python modules (`envs`/`eval`/`data`/`controllers`) — only the distribution name is `sim2act`.
+- Don't call it "Rohde & Schwarz". R&S = Robotics & Simulation.
+- Don't advertise OpenVLA / Octo / π0 as done — `OpenVLAWrapper` is wired but unvalidated (an extension point).
+- Don't hardcode `/home/kevin786/...` — use the `BASH_SOURCE` repo-root pattern (see `scripts/*.sh`).
+- Don't launch the heavy push fix without a `SPEEDRUN=1` smoke first.
+
+## Layout
+
+`envs/` (base/tasks/scenes cfg) · `controllers/` (Warp GPU state machine) · `scripts/{collect,train,eval,rl,viz}`
+· `eval/` (VLA eval harness) · `data/convert_to_lerobot.py` · `tools/{checks,smoke,viz}` ·
+`experiments/act_push_failure/` (flagship failure analysis, no-sim) · `docs/` · `_out/` (generated, gitignored).
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..ef1f090
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,41 @@
+# Contributing to sim2act
+
+Thanks for your interest. sim2act is a research codebase; contributions that improve
+reproducibility, add tasks/policies, or sharpen the analysis are very welcome.
+
+## Environments
+
+Two conda envs are used (full matrix in [CLAUDE.md](CLAUDE.md)):
+
+- **`isaaclab`** — anything that touches the simulator: demo collection, PPO RL training, LeRobot
+  conversion, and closed-loop eval. Isaac Lab is pip-installed into this env, so run scripts with
+  plain `python` (not `./isaaclab.sh -p`).
+- **`lerobot`** — the simulator-free failure analysis (`lerobot==0.5.2`).
+
+Install the package editable:
+
+```bash
+conda run -n isaaclab python -m pip install -e ".[hub]"
+```
+
+## Sanity check without a GPU or simulator
+
+The flagship failure analysis reproduces in ~5 minutes from a published dataset + checkpoint, no
+Isaac Sim required:
+
+```bash
+conda activate lerobot
+bash scripts/bootstrap_assets.sh --minimal   # pulls the dataset + checkpoint from the HF Hub
+bash experiments/act_push_failure/run_all.sh
+```
+
+## Style
+
+- Python is linted with [ruff](https://docs.astral.sh/ruff/): `ruff check . && ruff format --check .`
+- Keep generated artifacts out of git — everything lands under `_out/` (gitignored).
+- Don't hardcode absolute paths; shell scripts resolve the repo root via `BASH_SOURCE`.
+
+## Pull requests
+
+Keep PRs focused and clearly described. If a change affects the pipeline, say which stage(s) and
+which conda env you validated it in. CI runs ruff plus the no-simulator reproducibility smoke.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..53da2d8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2026 Kevin Ma
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
index 7510b12..4b43ea9 100644
--- a/README.md
+++ b/README.md
@@ -1,317 +1,199 @@
-# Isaac_sim — Franka VLA Pipeline（pick / barrier / push）
+# sim2act — a VLA simulation data engine
 
-基於 **IsaacLab** 建立的 Franka 操作示範收集與 VLA 訓練資料管線。
+[![CI](https://github.com/Kevinma0215/sim2act/actions/workflows/ci.yml/badge.svg)](https://github.com/Kevinma0215/sim2act/actions/workflows/ci.yml)
+[![License: Apache-2.0](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](pyproject.toml)
+[![Isaac Sim 5.1](https://img.shields.io/badge/Isaac%20Sim-5.1-76B900.svg)](https://developer.nvidia.com/isaac/sim)
+[![LeRobot v3.0](https://img.shields.io/badge/LeRobot-v3.0-orange.svg)](https://github.com/huggingface/lerobot)
 
-> **目標**：Env 設定 → 示範收集（SM oracle / RL oracle）→ LeRobot v3.0 格式 → ACT 訓練 → VLA Eval Harness
+**From simulation to action.** sim2act manufactures Vision-Language-Action (VLA) training data in
+NVIDIA Isaac Lab — and stress-tests the policies it produces.
 
----
+🇹🇼 中文版：[README.zh-Hant.md](README.zh-Hant.md)
 
-## Challenge 對應（R&S Round 2）
+<p align="center">
+  <img src="docs/images/act_barrier_success.gif" width="640" alt="ACT policy picking a block over a barrier"><br>
+  <em>A learned ACT policy clearing the barrier task — trained entirely from simulated oracle demos.</em>
+</p>
 
-本專案為 R&S Round-2 挑戰（`R&S Challenge.md`）的實作；書面報告見
-[`Project-Corvinus-Report.pdf`](Project-Corvinus-Report.pdf)。
+## TL;DR
 
-| Goal | 內容 | 本專案實作 | 狀態 |
-|------|------|-----------|:---:|
-| 1 | 桌上放一個 box | `scripts/viz/two_box_table.py`（`envs/scenes/two_box_table_env.py`）| ✅ |
-| 2 | 第二個 box | 同上（場景含兩顆 box）| ✅ |
-| 3 | 用 box1 把 box2 推向桌角 | push RL oracle（`envs/tasks/push/`，`scripts/train/push_rl.py`）| ✅ oracle；ACT 已診斷待修復 |
-| 4 | 1/3 臂高屏障，pick 越障到另一側 | barrier SM（`envs/tasks/barrier/`，調高 lift/approach）| ✅ |
-| 5 | 收集可實際使用的感測資料 | RGB-D + wrist RGB + joint-state + 指尖 contact（`envs/base/`）| ✅（IMU 經分析判為冗餘移除）|
+sim2act takes three contact-rich Franka manipulation tasks — pick, pick-over-barrier, and
+non-prehensile push — through a complete data flywheel:
 
----
+> **scene → privileged oracle (Warp GPU state machine + a 4096-env PPO teacher) → multimodal demo
+> collection → LeRobot v3.0 → ACT imitation learning → a closed-loop eval harness**
 
-## Pipeline 概覽
+one command per stage, fully reproducible. Two results carry the project:
 
-```
-[ pick_place / barrier ]                         [ push ]
-FrankaBaseEnvCfg + pick_place_cube_sm.py         PPO push teacher（RL oracle）
-(overhead + wrist camera, Warp SM)               scripts/train/push_rl.py → rl/export_push.py
-              │                                              │
-              ▼                                              ▼
-   scripts/collect/demos.py                        scripts/collect/push_rl_demos.py
-   (--task pick_place|barrier)                     (RL policy rollout)
-              │                                              │
-              └────────────────────┬─────────────────────────┘
-                                   ▼
-              _out/datasets/franka_<task>_demos/dataset.hdf5
-              (actions + obs + rgb_overhead/wrist + depth + contact)
-                                   │
-                                   ▼
-                  data/convert_to_lerobot.py → LeRobot v3.0（parquet + mp4）
-                                   │
-                      ┌────────────┴─────────────┐
-                      ▼                           ▼
-            scripts/train/act.py        eval/ + scripts/eval/policy.py
-            (ACT，IL)          ──────▶  (VLA rollout → _out/eval/eval_*.json)
-```
-
----
-
-## 目錄結構
+1. a student ACT policy that **beats its privileged oracle on the barrier task (90% vs 75%)**, with a
+   measured out-of-distribution generalization curve; and
+2. a **rigorous, simulator-free root-cause diagnosis of a 0%-success push policy** — traced to a
+   camera-shortcut induced by under-randomized initial states — plus the **before→after fix** that
+   targets it.
 
-```
-Isaac_sim/
-├── pyproject.toml                # editable 套件定義（移除 sys.path hack 的關鍵）
-│
-├── envs/                         # 環境 config（base / tasks / scenes 分層）
-│   ├── base/                      # 共用基底（繼承鏈，每層只負責一件事）
-│   │   ├── franka_base_env.py     #   FrankaSensorEnvCfg  (+ overhead RGB-D / wrist RGB)
-│   │   │                          #     └ FrankaPickVLAEvalEnvCfg  (評估分支，無 recorder)
-│   │   │                          #   FrankaBaseEnvCfg    (+ contact 感測 + RecorderManager)
-│   │   │                          #     └ FrankaPickVLAEnvCfg      (pick-and-place)
-│   │   └── recorders.py           #   PreStepStructuredObsRecorder → 結構化 obs/
-│   │                              #     joint_pos/vel/torque, object_pos/quat,
-│   │                              #     rgb_overhead/wrist, depth_overhead, contact_l/rfinger
-│   ├── tasks/                     # 各任務（繼承 base，只加自己的場景/事件）
-│   │   ├── push/                  #   push 走 RL oracle（SM push 已移除）
-│   │   │   ├── __init__.py        #   PUSH_CORNER 桌角目標常數（RL reward/eval 共用）
-│   │   │   ├── eval_env.py        #   FrankaPushVLAEvalEnvCfg（ACT eval）
-│   │   │   └── rl/                #   push RL oracle 子系統（PPO teacher）
-│   │   │       ├── action.py      #     PlanarPushActionCfg (x,y,yaw)
-│   │   │       ├── mdp.py         #     push 專用 obs/reward 函式
-│   │   │       ├── train_env.py   #     FrankaPushRLEnvCfg（4096 env，無相機；box DR 由 PUSH_BOX_DR 控制）
-│   │   │       ├── collect_env.py #     FrankaPushRLCollectEnvCfg（用 policy 收 demo）
-│   │   │       └── ppo_cfg.py     #     PushPPORunnerCfg（rsl_rl 超參）
-│   │   └── barrier/
-│   │       ├── collect_env.py     #   FrankaBarrierEnvCfg + apply_barrier_scene（Goal 4）
-│   │       └── eval_env.py        #   FrankaBarrierVLAEvalEnvCfg（ACT eval）
-│   └── scenes/
-│       └── two_box_table_env.py   #   TwoBoxTableSceneCfg：純場景（桌 + 兩方塊，無手臂，Goal 1/2）
-│
-├── controllers/                  # Warp GPU state machine（機器人無關 kernel）
-│   └── pick_place_cube_sm.py     #   PickAndPlaceSm：REST→…→GRASP→LIFT→…→RETREAT→DONE
-│                                  #   （pick + barrier 共用；push 走 RL，無 SM）
-│
-├── scripts/                      # pipeline entrypoints（依功能分子資料夾）
-│   ├── collect/                  #   收集 demo
-│   │   ├── demos.py              #     SM oracle（--task pick_place|barrier）→ HDF5
-│   │   └── push_rl_demos.py      #     RL oracle（push）→ HDF5
-│   ├── train/                    #   訓練
-│   │   ├── act.py                #     ACT（IL，wrap lerobot-train）
-│   │   └── push_rl.py            #     PPO（push RL teacher，rsl_rl）
-│   ├── eval/                     #   VLA 評估
-│   │   ├── policy.py            #      評估入口（--policy dummy|act|openvla|oracle）
-│   │   ├── oracle_noise_sweep.py #     oracle 位姿噪聲掃描（去 privilege 敏感度）
-│   │   └── record_demos.sh      #      一鍵錄製 in-dist / OOD / oracle 對照影片
-│   ├── rl/                       #   push RL 輔助
-│   │   ├── export_push.py        #     checkpoint → policy.pt（jit）
-│   │   └── play_push.py          #     播放/視覺化訓好的 policy（量成功率）
-│   ├── viz/                      #   場景視覺化 debug（不錄製）
-│   │   ├── scripted_pick_place.py
-│   │   └── two_box_table.py      #     two-box 場景（Goal 1/2）
-│   ├── fix_push_widen_dr.sh      #   push 修復 pipeline：加大 DR→重訓 PPO→重收→轉檔→重訓 ACT
-│   └── recollect_retrain_barrier.sh  # barrier 乾淨重收（無 marker）+ 重訓 ACT
-│
-├── tools/                        # 開發/驗證工具（依功能分子資料夾）
-│   ├── checks/                   #   健全性/資料檢查（env / hdf5_data / push_demos）
-│   ├── smoke/                    #   煙霧測試（planar_push / push_rl_env）
-│   ├── viz/                      #   分析/視覺化（多數純資料集、不需模擬器）
-│   │   ├── eval_ood.py          #      OOD 泛化報告（ACT vs oracle，分區成功率）
-│   │   ├── barrier_init_map.py  #      barrier 初始分佈 × 成功/失敗 俯視圖
-│   │   ├── push_camera_sensitivity.py  # ACT push 各相機敏感度（ablation bar）
-│   │   ├── push_training_curve.py #     PPO push teacher 訓練曲線（success / reward）
-│   │   └── pcd.py / push_demos.py #     點雲 / push demo 視覺化
-│   └── migrate_outputs.sh        #   一次性：舊輸出 → _out/（dry-run 預設）
-│
-├── eval/                         # VLA Eval Harness（可安裝套件）
-│   ├── vla_wrapper.py            #   VLAWrapper (ABC) / Dummy / ACTLocal / OpenVLA / SMOracle
-│   ├── obs_adapter.py            #   Isaac Lab scene state → VLA 輸入 dict（build_obs_dict）
-│   ├── eval_runner.py            #   EvalRunner：多環境 rollout + success latch
-│   └── video_recorder.py         #   三相機（overhead/wrist/side）逐 episode 錄影
-│
-├── data/                         # 資料格式轉換
-│   └── convert_to_lerobot.py     #   HDF5 → LeRobot v3.0（parquet + mp4）
-│
-├── experiments/                  # 非 MVP 探索（可復現分析，不需模擬器）
-│   └── act_push_failure/          #   ACT push 失效根因分析（相機依賴 ablation）
-│       ├── act_camera_ablation.py #     E1 teacher-forcing + E2 逐相機 ablation
-│       ├── run_all.sh             #     一鍵復現（push + barrier 對照 + 影像）
-│       ├── README.md              #     分析說明（實驗 ↔ 推論 ↔ 證據）
-│       ├── REPORT_SECTION.md      #     可直接貼進報告的段落草稿
-│       └── results/               #     證據（log / json / 訓練影像）
-│
-└── _out/                         # ★所有 generated 產物統一根（.gitignore）
-    ├── datasets/                 #   franka_<task>_demos/dataset.hdf5（raw）+ lerobot/（LeRobot v3.0）
-    ├── eval/                     #   eval_*.json 評估結果
-    ├── rl/                       #   rsl_rl 訓練 log + policy.pt / model_*.pt
-    ├── act/                      #   ACT checkpoints
-    ├── viz/                      #   demo gif/影片（含 demos/）
-    └── debug/                    #   相機截圖 / 點雲
-```
-> 舊散落輸出（`datasets/` `results/` `logs/` `outputs/` `viz_demos/` `debug_images/`）整合進 `_out/`；
-> 既有資料用 `bash tools/migrate_outputs.sh --run` 搬移（預設 dry-run 先預覽）。
+The diagnosis reproduces in **~5 minutes on a published dataset + checkpoint, no simulator required**.
 
-> Goal 5 感測決策（D4）：**IMU 已移除**（固定基座下，hand 的線加速度/角速度可由 joint-state
-> + FK 完全推得，屬冗餘）。保留的感測 = overhead RGB-D + wrist RGB + joint-state + gripper + 指尖 contact。
+The design optimizes for what foundation-model robotics actually weighs: data infrastructure at
+scale, empirical rigor, honest failure analysis, and reproducibility. It started as a *Robotics & Sim*
+take-home challenge whose thesis was *"the challenge is not the quantity of the data, but the
+quality"* (provenance in [docs/archive/](docs/archive/)).
 
----
+## Headline results
 
-## 快速上手
+| Task | Oracle | Oracle SR | ACT (in-dist) | ACT (OOD ×1.5) | Learned camera | Status |
+|---|---|:---:|:---:|:---:|---|:---:|
+| `pick_place` | Warp state machine | — | — | — | overhead | demos ✅ |
+| `barrier` | Warp state machine | 75% | **90%** | 55% | overhead (robust) | ✅ |
+| `push` | PPO teacher (4096 env, ~98.5% train) | — | **0%** → _(pending fix run)_ | — | wrist (fragile) → overhead | 🔬 diagnosed + fix in progress |
 
-### 前置需求
+The student **beats the teacher** on barrier (90% > 75%), and the push failure is diagnosed down to a
+single causal lever — initial-state randomization width — with controlled camera ablations. Full
+numbers and methodology: [docs/results.md](docs/results.md).
 
-1. Isaac Lab（需先完成安裝，使用 `./isaaclab.sh`）。
-2. **一次性：把本專案裝成 editable 套件**（這樣各 entrypoint 不需 `sys.path` hack 即可 `from envs...`）：
+## Architecture
 
-```bash
-./isaaclab.sh -p -m pip install -e .
-# 轉 LeRobot 的相依（通常在獨立 lerobot env）：./isaaclab.sh -p -m pip install -e ".[lerobot]"
-# 點雲視覺化：./isaaclab.sh -p -m pip install -e ".[viz]"
 ```
-
-### Step 1：收集示範
-
-```bash
-# 收集 50 個成功 demo（headless + 開相機）
-./isaaclab.sh -p scripts/collect/demos.py --task pick_place --num_demos 50 --headless --enable_cameras
-
-# 快速測試（只收 3 個）
-./isaaclab.sh -p scripts/collect/demos.py --num_demos 3 --headless --enable_cameras
+                    ┌──────────────── privileged oracle ────────────────┐
+   scene            │  Warp GPU state machine  (pick / barrier)         │
+  (Franka +  ─────▶ │  PPO RL teacher, 4096 envs  (push)                │
+   two boxes)       └───────────────────────┬───────────────────────────┘
+                                            ▼
+                          multimodal demo collection
+              overhead RGB-D + wrist RGB + joint pos/vel/torque
+                  + fingertip contact forces   ·   8-D action
+                                            │
+                                            ▼
+                  raw HDF5  ──▶  LeRobot v3.0  (parquet + mp4)
+                                            │
+                            ┌───────────────┴────────────────┐
+                            ▼                                ▼
+                ACT imitation learning          closed-loop eval harness
+                  (chunked actions)             VLAWrapper · success-latch
+                                                camera ablation · OOD sweep
 ```
 
-**輸出** → `_out/datasets/franka_pick_demos/dataset.hdf5`
+<p align="center">
+  <img src="docs/images/sm_pick_9.gif" width="265" alt="state-machine pick">
+  <img src="docs/images/rl_push_suc_16.gif" width="265" alt="PPO push teacher">
+  <img src="docs/images/act_barrier_success.gif" width="265" alt="ACT barrier success"><br>
+  <em>Left → right: Warp state-machine pick · PPO push teacher · learned ACT barrier policy.</em>
+</p>
 
-| 資料欄位 | Shape |
-|---------|-------|
-| `actions` | `(T, 8)` arm(7) + gripper(1) |
-| `obs/joint_pos` / `joint_vel` / `joint_torque` | `(T, 9)` |
-| `obs/object_pos` | `(T, 3)` env-local frame |
-| `obs/object_quat` | `(T, 4)` wxyz |
-| `obs/rgb_overhead` / `rgb_wrist` | `(T, 224, 224, 3)` uint8 |
-| `obs/depth_overhead` | `(T, 224, 224, 1)` float32 公尺 |
-| `obs/contact_lfinger` / `contact_rfinger` | `(T, 3)` 淨接觸力 (N) |
+- **Multimodal observation** per step: overhead RGB-D (224²) + wrist RGB (224²) + joint
+  position/velocity/torque + dual fingertip contact forces. **Action:** 8-D (7-D IK-absolute
+  end-effector pose + gripper) at ~50 Hz.
+- **Two oracles, by design.** A deterministic Warp state machine
+  ([docs/state-machine.md](docs/state-machine.md)) drives the prehensile pick / barrier tasks; a
+  learned PPO teacher (4096 parallel envs, ~98.5% training success) drives the contact-rich push,
+  which a hand-written controller handles poorly.
 
-### Step 2：轉換為 LeRobot v3.0 格式
+Deep dive: [docs/architecture.md](docs/architecture.md).
 
-```bash
-python data/convert_to_lerobot.py \
-    --input  _out/datasets/franka_pick_demos/dataset.hdf5 \
-    --output _out/datasets/lerobot/franka_pick_place
-# fps 預設讀 HDF5 的 fps attr（collect 寫入＝實際控制頻率 50）；
-# 舊資料集無此 attr → fallback 30 並警告，請改用新版 collect 重收，或手動 --fps 50。
-
-# 不編碼影片（更快，不需要 imageio）
-python data/convert_to_lerobot.py \
-    --input  _out/datasets/franka_pick_demos/dataset.hdf5 \
-    --output _out/datasets/lerobot/franka_pick_place \
-    --no_video
-```
+## Signature case study — diagnosing a 0% push policy
 
-### Step 3：訓練 ACT（IL）
+The same pipeline that yields **90% on barrier** yields **0% on push**: the arm ignores the first box
+and drives diagonally toward the corner from the first step.
 
-ACT 在獨立的 lerobot conda env 訓練（`scripts/train/act.py` 是 `lerobot-train` 的薄 wrapper）：
+<p align="center">
+  <img src="docs/images/act_push_fail.gif" width="320" alt="push policy failing">
+  <img src="docs/images/act_push_fail_no_wrist.gif" width="320" alt="push policy with wrist camera blacked out"><br>
+  <em>Left: the push policy failing. Right: the same scene with the wrist camera blacked out —
+  the behavior barely changes, exposing the policy's reliance on the wrist view.</em>
+</p>
 
-```bash
-conda activate lerobot
-# 小量驗證 pipeline（少量 steps、不開 wandb）
-python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier_v0 --smoke
+Per-camera ablation (mean action change when one camera is zeroed):
 
-# 正式訓練 + wandb
-python scripts/train/act.py \
-    --dataset _out/datasets/lerobot/franka_barrier_v0 \
-    --steps 100000 --batch-size 16 --wandb --wandb-project franka-vla
-```
-
-**輸出** → `_out/act/<run>/checkpoints/last/pretrained_model`
+| ablation | push | barrier |
+|---|:---:|:---:|
+| black **overhead** Δ | 0.038 | **0.089** |
+| black **wrist** Δ | **0.197** | 0.027 |
 
-### Step 4：VLA Eval Harness
+Push leans on the fragile, ego-motion-coupled **wrist** camera; barrier leans on the robust static
+**overhead** camera. Teacher-forcing replay (EE-xy L1 = **0.011 m**) proves the model *did* learn the
+demonstrations — so this is a closed-loop **covariate-shift** failure, not under-training. Root cause:
+push initial-state randomization is only **±3 cm** (vs barrier **±13/±7 cm**), which makes the static
+overhead view nearly invariant and uninformative, pushing the policy onto the wrist shortcut. The fix
+widens the randomization ([`scripts/fix_push_widen_dr.sh`](scripts/fix_push_widen_dr.sh), via
+`PUSH_BOX_DR`); the before→after comparison is in progress.
 
-`eval/`（VLAWrapper / obs_adapter / EvalRunner）+ `scripts/eval/policy.py` 提供多環境
-rollout 評估，結果輸出到 `_out/eval/eval_YYYY-MM-DD_HH-MM-SS.json`。
+**Reproduce in ~5 minutes, no simulator:**
 
 ```bash
-# Phase 1：用隨機 dummy policy 驗證 pipeline
-./isaaclab.sh -p scripts/eval/policy.py --policy dummy --num_rollouts 20 --headless --enable_cameras
-
-# 評估訓好的 ACT（in-process；需 eval env 裝 lerobot）
-./isaaclab.sh -p scripts/eval/policy.py --policy act --task barrier \
-    --model_path _out/act/<run>/checkpoints/last/pretrained_model \
-    --num_rollouts 20 --headless --enable_cameras
-#   進階：--n_action_steps 25 提高 replan 頻率；--ablate_camera overhead|wrist 量相機貢獻
-
-# SM oracle baseline 對照（pick/barrier）：--oracle-pose gt|noisy 比較 privileged vs 去 privilege
-./isaaclab.sh -p scripts/eval/policy.py --policy oracle --task barrier --oracle-pose gt \
-    --num_rollouts 20 --headless --enable_cameras
-
-# 評估 fine-tuned OpenVLA
-./isaaclab.sh -p scripts/eval/policy.py --policy openvla \
-    --model_path /path/to/finetuned-openvla --unnorm_key franka_pick \
-    --num_rollouts 50 --num_envs 4 --headless --enable_cameras
+conda activate lerobot
+bash scripts/bootstrap_assets.sh --minimal      # pull the dataset + checkpoint from the HF Hub
+bash experiments/act_push_failure/run_all.sh    # regenerates the ablation evidence
 ```
 
-### （選用）Barrier / two-box 場景
-
-```bash
-# Goal 4：把方塊抓起越過屏障放到另一側，收集 barrier 示範
-./isaaclab.sh -p scripts/collect/demos.py --task barrier --num_demos 50 --headless --enable_cameras
-
-# Goal 1/2：純場景 demo（桌上兩顆方塊，無手臂）
-./isaaclab.sh -p scripts/viz/two_box_table.py
-```
+Full write-up: [docs/case-study-push.md](docs/case-study-push.md).
 
-> **Barrier 越障原理**：屏障是 y=0 的一道靜態牆，高度 ≈ 1/3 手臂高（`BARRIER_HEIGHT`）。
-> 越障靠的是把共用 `PickAndPlaceSm` 的 `lift_height` / `approach_height` 調高
-> （見 `envs/tasks/barrier/collect_env.py` 常數），讓方塊在高處水平橫越屏障，之後才在另一側
-> 垂直下降放置——不需另寫狀態機。屏障變高時，請一併調高這兩個高度以維持越障淨空。
+## Quickstart
 
-### （選用）Goal 3：Push（RL oracle 路徑）
+### 1 · Reproduce the failure analysis (no GPU / simulator, ~5 min)
 
-push 不走 SM，而是先用 PPO 訓一個 push teacher、匯出成 TorchScript 後當 oracle 收 demo。
-box 初始隨機化（DR）由 `PUSH_BOX_DR` 環境變數控制，train / collect / eval 共用同一值：
+See the case-study block above — it runs entirely in the `lerobot` conda env on a published dataset +
+checkpoint.
 
-```bash
-# ① 訓練 PPO push teacher（headless，多 env）
-./isaaclab.sh -p scripts/train/push_rl.py --headless --num_envs 4096
-#   tensorboard --logdir _out/rl/franka_push
+### 2 · Run the full pipeline (requires Isaac Lab)
 
-# ② 匯出最新 checkpoint → policy.pt（jit）
-./isaaclab.sh -p scripts/rl/export_push.py --headless
+sim2act uses two conda envs (full matrix in [CLAUDE.md](CLAUDE.md)):
 
-# （選用）視覺化 / 量成功率
-./isaaclab.sh -p scripts/rl/play_push.py --num_envs 16
+| env | used for |
+|---|---|
+| `isaaclab` (Isaac Sim 5.1) | demo collection · PPO RL · LeRobot conversion · eval |
+| `lerobot` (lerobot 0.5.2) | the simulator-free failure analysis |
 
-# ③ 用 RL oracle 收 push demo → HDF5
-./isaaclab.sh -p scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras
+```bash
+conda run -n isaaclab python -m pip install -e ".[hub]"      # one-time editable install
+
+# collect → convert → train → eval  (barrier shown; see docs/architecture.md for all tasks)
+python scripts/collect/demos.py --task barrier --num_demos 100 --headless --enable_cameras
+python data/convert_to_lerobot.py --input _out/datasets/franka_barrier_official_demos/dataset.hdf5 \
+       --output _out/datasets/lerobot/franka_barrier --state_keys joint_pos,joint_vel --no_depth
+python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier --steps 40000
+python scripts/eval/policy.py --policy act --task barrier \
+       --model_path _out/act/<run>/checkpoints/last/pretrained_model \
+       --num_rollouts 20 --headless --enable_cameras
 ```
 
-### 已知限制：ACT push 相機依賴（已診斷，修復待跑）
+Push uses the RL-teacher chain (`scripts/train/push_rl.py` → `scripts/rl/export_push.py` →
+`scripts/collect/push_rl_demos.py`); see [docs/architecture.md](docs/architecture.md).
 
-push 的 ACT 會學成**幾乎只依賴隨手移動的 wrist 第一人稱相機、幾乎不用靜態 overhead**，
-closed-loop 一偏離 demo 就 OOD → covariate shift（根因＝box init DR 僅 ±3cm 過窄）。
-完整可復現分析見 [`experiments/act_push_failure/`](experiments/act_push_failure/)
-（`README.md` / `REPORT_SECTION.md`，純資料集 + checkpoint、不需啟動 Isaac Sim）：
+## What's inside
 
-```bash
-conda activate lerobot
-bash experiments/act_push_failure/run_all.sh
-```
+| Task | Oracle | Primary camera | Status |
+|---|---|---|---|
+| `pick_place` | Warp state machine | overhead | demos ✅ |
+| `barrier` (pick over a ⅓-arm-height wall) | Warp state machine | overhead (robust) | ACT 90% ✅ |
+| `push` (non-prehensile, box→box→corner) | PPO teacher | wrist → overhead (after fix) | diagnosed, fix in progress 🔬 |
 
-修復已包成一鍵 pipeline `scripts/fix_push_widen_dr.sh`（把 DR 加大到 ±10–13cm 對齊 barrier，
-重訓 PPO→重收→轉檔→重訓 ACT；先 `SPEEDRUN=1` 驗整條通）——**尚未執行**。
+**Sensor suite** (collected for every demo): overhead RGB-D, wrist RGB, joint position/velocity/torque,
+and left/right fingertip contact forces — chosen for realistic sim-to-real transfer.
 
----
+## Repository layout
 
-## State Machine 狀態說明
+```
+envs/          Isaac Lab env configs (base / tasks / scenes; clean inheritance chain)
+controllers/   Warp GPU state machine (PickAndPlaceSm)
+scripts/       pipeline entrypoints — collect / train / eval / rl / viz
+eval/          VLA eval harness (VLAWrapper · EvalRunner · obs adapter · video)
+data/          HDF5 → LeRobot v3.0 conversion
+experiments/   act_push_failure/ — the flagship, simulator-free failure analysis
+tools/         checks / smoke tests / analysis & visualization
+docs/          architecture · case study · results · state machine · archive
+_out/          all generated artifacts (gitignored; fetched via scripts/bootstrap_assets.sh)
+```
 
-| State | 說明 |
-|-------|------|
-| `REST (0)` | 等待初始化（0.2s） |
-| `APPROACH_ABOVE_OBJECT (1)` | 移至物件正上方 +0.1m |
-| `APPROACH_OBJECT (2)` | 下降至物件位置 |
-| `GRASP_OBJECT (3)` | 閉合夾爪（0.3s） |
-| `LIFT_OBJECT (4)` | 抬起至 z=0.3m（越障時調高） |
-| `MOVE_TO_PLACE (5)` | 移至放置目標正上方 |
-| `PLACE (6)` | 下降至放置位置，開夾爪 |
-| `LIFT_AFTER_PLACE (7)` | 垂直拉高（避免撞到方塊） |
-| `RETREAT (8)` | 退回原點 (0.5, 0, 0.4) |
-| `DONE (9)` | 保持位置，等待 env reset |
+## Roadmap
 
----
+- Run the push DR-widening fix to completion and publish the before→after result.
+- Validate additional policy backends — `OpenVLAWrapper` is wired in `eval/vla_wrapper.py` but
+  **not yet validated**; Octo / π0 are natural next wrappers.
+- Attack covariate shift directly: DAgger / action-noise collection to cover off-trajectory views.
 
-## 知識庫連結
+## Citation · License · Acknowledgements
 
-- 實作細節（Obsidian）：`2_DevOps-Tools/Isaac-Sim/07-FrankaPickVLA/`
-  （env-config / state-machine / demo-pipeline / lerobot-conversion / eval-harness / decision-log）
-- 環境 config 繼承設計：`2_DevOps-Tools/Isaac-Sim/IsaacLab/`（env config 乾淨構築法）
-- 專案框架與決策：`2_DevOps-Tools/Isaac-Sim/Project_Corvinus_Franka/`
+If you use sim2act, please cite it (see [CITATION.cff](CITATION.cff)). Licensed under
+**Apache-2.0** ([LICENSE](LICENSE)). Built on [Isaac Lab](https://github.com/isaac-sim/IsaacLab),
+[LeRobot](https://github.com/huggingface/lerobot), [rsl_rl](https://github.com/leggedrobotics/rsl_rl),
+and the [ACT](https://github.com/tonyzhaozh/act) architecture.
diff --git a/README.zh-Hant.md b/README.zh-Hant.md
new file mode 100644
index 0000000..4c889c7
--- /dev/null
+++ b/README.zh-Hant.md
@@ -0,0 +1,314 @@
+# sim2act — Franka VLA 模擬資料引擎（pick / barrier / push）
+
+> 🇬🇧 English: **[README.md](README.md)**　｜　本檔為繁體中文版（以英文版為準）。
+
+基於 **IsaacLab** 建立的 Franka 操作示範收集與 VLA 訓練資料管線。
+
+> **目標**：Env 設定 → 示範收集（SM oracle / RL oracle）→ LeRobot v3.0 格式 → ACT 訓練 → VLA Eval Harness
+
+---
+
+## 緣起（Robotics & Sim 挑戰）
+
+本專案最初為 **Robotics & Sim（R&S）** 團隊的 Round-2 take-home 挑戰實作（核心命題：sim-to-real
+VLA 資料「**重質不重量**」）；原始題目與書面報告見 [`docs/archive/`](docs/archive/)。下表對應原始 Goals 1–5。
+
+| Goal | 內容 | 本專案實作 | 狀態 |
+|------|------|-----------|:---:|
+| 1 | 桌上放一個 box | `scripts/viz/two_box_table.py`（`envs/scenes/two_box_table_env.py`）| ✅ |
+| 2 | 第二個 box | 同上（場景含兩顆 box）| ✅ |
+| 3 | 用 box1 把 box2 推向桌角 | push RL oracle（`envs/tasks/push/`，`scripts/train/push_rl.py`）| ✅ oracle；ACT 已診斷待修復 |
+| 4 | 1/3 臂高屏障，pick 越障到另一側 | barrier SM（`envs/tasks/barrier/`，調高 lift/approach）| ✅ |
+| 5 | 收集可實際使用的感測資料 | RGB-D + wrist RGB + joint-state + 指尖 contact（`envs/base/`）| ✅（IMU 經分析判為冗餘移除）|
+
+---
+
+## Pipeline 概覽
+
+```
+[ pick_place / barrier ]                         [ push ]
+FrankaBaseEnvCfg + pick_place_cube_sm.py         PPO push teacher（RL oracle）
+(overhead + wrist camera, Warp SM)               scripts/train/push_rl.py → rl/export_push.py
+              │                                              │
+              ▼                                              ▼
+   scripts/collect/demos.py                        scripts/collect/push_rl_demos.py
+   (--task pick_place|barrier)                     (RL policy rollout)
+              │                                              │
+              └────────────────────┬─────────────────────────┘
+                                   ▼
+              _out/datasets/franka_<task>_demos/dataset.hdf5
+              (actions + obs + rgb_overhead/wrist + depth + contact)
+                                   │
+                                   ▼
+                  data/convert_to_lerobot.py → LeRobot v3.0（parquet + mp4）
+                                   │
+                      ┌────────────┴─────────────┐
+                      ▼                           ▼
+            scripts/train/act.py        eval/ + scripts/eval/policy.py
+            (ACT，IL)          ──────▶  (VLA rollout → _out/eval/eval_*.json)
+```
+
+---
+
+## 目錄結構
+
+```
+Isaac_sim/
+├── pyproject.toml                # editable 套件定義（移除 sys.path hack 的關鍵）
+│
+├── envs/                         # 環境 config（base / tasks / scenes 分層）
+│   ├── base/                      # 共用基底（繼承鏈，每層只負責一件事）
+│   │   ├── franka_base_env.py     #   FrankaSensorEnvCfg  (+ overhead RGB-D / wrist RGB)
+│   │   │                          #     └ FrankaPickVLAEvalEnvCfg  (評估分支，無 recorder)
+│   │   │                          #   FrankaBaseEnvCfg    (+ contact 感測 + RecorderManager)
+│   │   │                          #     └ FrankaPickVLAEnvCfg      (pick-and-place)
+│   │   └── recorders.py           #   PreStepStructuredObsRecorder → 結構化 obs/
+│   │                              #     joint_pos/vel/torque, object_pos/quat,
+│   │                              #     rgb_overhead/wrist, depth_overhead, contact_l/rfinger
+│   ├── tasks/                     # 各任務（繼承 base，只加自己的場景/事件）
+│   │   ├── push/                  #   push 走 RL oracle（SM push 已移除）
+│   │   │   ├── __init__.py        #   PUSH_CORNER 桌角目標常數（RL reward/eval 共用）
+│   │   │   ├── eval_env.py        #   FrankaPushVLAEvalEnvCfg（ACT eval）
+│   │   │   └── rl/                #   push RL oracle 子系統（PPO teacher）
+│   │   │       ├── action.py      #     PlanarPushActionCfg (x,y,yaw)
+│   │   │       ├── mdp.py         #     push 專用 obs/reward 函式
+│   │   │       ├── train_env.py   #     FrankaPushRLEnvCfg（4096 env，無相機；box DR 由 PUSH_BOX_DR 控制）
+│   │   │       ├── collect_env.py #     FrankaPushRLCollectEnvCfg（用 policy 收 demo）
+│   │   │       └── ppo_cfg.py     #     PushPPORunnerCfg（rsl_rl 超參）
+│   │   └── barrier/
+│   │       ├── collect_env.py     #   FrankaBarrierEnvCfg + apply_barrier_scene（Goal 4）
+│   │       └── eval_env.py        #   FrankaBarrierVLAEvalEnvCfg（ACT eval）
+│   └── scenes/
+│       └── two_box_table_env.py   #   TwoBoxTableSceneCfg：純場景（桌 + 兩方塊，無手臂，Goal 1/2）
+│
+├── controllers/                  # Warp GPU state machine（機器人無關 kernel）
+│   └── pick_place_cube_sm.py     #   PickAndPlaceSm：REST→…→GRASP→LIFT→…→RETREAT→DONE
+│                                  #   （pick + barrier 共用；push 走 RL，無 SM）
+│
+├── scripts/                      # pipeline entrypoints（依功能分子資料夾）
+│   ├── collect/                  #   收集 demo
+│   │   ├── demos.py              #     SM oracle（--task pick_place|barrier）→ HDF5
+│   │   └── push_rl_demos.py      #     RL oracle（push）→ HDF5
+│   ├── train/                    #   訓練
+│   │   ├── act.py                #     ACT（IL，wrap lerobot-train）
+│   │   └── push_rl.py            #     PPO（push RL teacher，rsl_rl）
+│   ├── eval/                     #   VLA 評估
+│   │   ├── policy.py            #      評估入口（--policy dummy|act|openvla|oracle）
+│   │   ├── oracle_noise_sweep.py #     oracle 位姿噪聲掃描（去 privilege 敏感度）
+│   │   └── record_demos.sh      #      一鍵錄製 in-dist / OOD / oracle 對照影片
+│   ├── rl/                       #   push RL 輔助
+│   │   ├── export_push.py        #     checkpoint → policy.pt（jit）
+│   │   └── play_push.py          #     播放/視覺化訓好的 policy（量成功率）
+│   ├── viz/                      #   場景視覺化 debug（不錄製）
+│   │   ├── scripted_pick_place.py
+│   │   └── two_box_table.py      #     two-box 場景（Goal 1/2）
+│   ├── fix_push_widen_dr.sh      #   push 修復 pipeline：加大 DR→重訓 PPO→重收→轉檔→重訓 ACT
+│   └── recollect_retrain_barrier.sh  # barrier 乾淨重收（無 marker）+ 重訓 ACT
+│
+├── tools/                        # 開發/驗證工具（依功能分子資料夾）
+│   ├── checks/                   #   健全性/資料檢查（env / hdf5_data / push_demos）
+│   ├── smoke/                    #   煙霧測試（planar_push / push_rl_env）
+│   ├── viz/                      #   分析/視覺化（多數純資料集、不需模擬器）
+│   │   ├── eval_ood.py          #      OOD 泛化報告（ACT vs oracle，分區成功率）
+│   │   ├── barrier_init_map.py  #      barrier 初始分佈 × 成功/失敗 俯視圖
+│   │   ├── push_camera_sensitivity.py  # ACT push 各相機敏感度（ablation bar）
+│   │   ├── push_training_curve.py #     PPO push teacher 訓練曲線（success / reward）
+│   │   └── pcd.py / push_demos.py #     點雲 / push demo 視覺化
+│   └── migrate_outputs.sh        #   一次性：舊輸出 → _out/（dry-run 預設）
+│
+├── eval/                         # VLA Eval Harness（可安裝套件）
+│   ├── vla_wrapper.py            #   VLAWrapper (ABC) / Dummy / ACTLocal / OpenVLA / SMOracle
+│   ├── obs_adapter.py            #   Isaac Lab scene state → VLA 輸入 dict（build_obs_dict）
+│   ├── eval_runner.py            #   EvalRunner：多環境 rollout + success latch
+│   └── video_recorder.py         #   三相機（overhead/wrist/side）逐 episode 錄影
+│
+├── data/                         # 資料格式轉換
+│   └── convert_to_lerobot.py     #   HDF5 → LeRobot v3.0（parquet + mp4）
+│
+├── experiments/                  # 非 MVP 探索（可復現分析，不需模擬器）
+│   └── act_push_failure/          #   ACT push 失效根因分析（相機依賴 ablation）
+│       ├── act_camera_ablation.py #     E1 teacher-forcing + E2 逐相機 ablation
+│       ├── run_all.sh             #     一鍵復現（push + barrier 對照 + 影像）
+│       ├── README.md              #     分析說明（實驗 ↔ 推論 ↔ 證據）
+│       ├── REPORT_SECTION.md      #     可直接貼進報告的段落草稿
+│       └── results/               #     證據（log / json / 訓練影像）
+│
+└── _out/                         # ★所有 generated 產物統一根（.gitignore）
+    ├── datasets/                 #   franka_<task>_demos/dataset.hdf5（raw）+ lerobot/（LeRobot v3.0）
+    ├── eval/                     #   eval_*.json 評估結果
+    ├── rl/                       #   rsl_rl 訓練 log + policy.pt / model_*.pt
+    ├── act/                      #   ACT checkpoints
+    ├── viz/                      #   demo gif/影片（含 demos/）
+    └── debug/                    #   相機截圖 / 點雲
+```
+> 舊散落輸出（`datasets/` `results/` `logs/` `outputs/` `viz_demos/` `debug_images/`）整合進 `_out/`；
+> 既有資料用 `bash tools/migrate_outputs.sh --run` 搬移（預設 dry-run 先預覽）。
+
+> Goal 5 感測決策（D4）：**IMU 已移除**（固定基座下，hand 的線加速度/角速度可由 joint-state
+> + FK 完全推得，屬冗餘）。保留的感測 = overhead RGB-D + wrist RGB + joint-state + gripper + 指尖 contact。
+
+---
+
+## 快速上手
+
+### 前置需求
+
+1. Isaac Lab（需先完成安裝，使用 `./isaaclab.sh`）。
+2. **一次性：把本專案裝成 editable 套件**（這樣各 entrypoint 不需 `sys.path` hack 即可 `from envs...`）：
+
+```bash
+./isaaclab.sh -p -m pip install -e .
+# 轉 LeRobot 的相依（通常在獨立 lerobot env）：./isaaclab.sh -p -m pip install -e ".[lerobot]"
+# 點雲視覺化：./isaaclab.sh -p -m pip install -e ".[viz]"
+```
+
+### Step 1：收集示範
+
+```bash
+# 收集 50 個成功 demo（headless + 開相機）
+./isaaclab.sh -p scripts/collect/demos.py --task pick_place --num_demos 50 --headless --enable_cameras
+
+# 快速測試（只收 3 個）
+./isaaclab.sh -p scripts/collect/demos.py --num_demos 3 --headless --enable_cameras
+```
+
+**輸出** → `_out/datasets/franka_pick_demos/dataset.hdf5`
+
+| 資料欄位 | Shape |
+|---------|-------|
+| `actions` | `(T, 8)` arm(7) + gripper(1) |
+| `obs/joint_pos` / `joint_vel` / `joint_torque` | `(T, 9)` |
+| `obs/object_pos` | `(T, 3)` env-local frame |
+| `obs/object_quat` | `(T, 4)` wxyz |
+| `obs/rgb_overhead` / `rgb_wrist` | `(T, 224, 224, 3)` uint8 |
+| `obs/depth_overhead` | `(T, 224, 224, 1)` float32 公尺 |
+| `obs/contact_lfinger` / `contact_rfinger` | `(T, 3)` 淨接觸力 (N) |
+
+### Step 2：轉換為 LeRobot v3.0 格式
+
+```bash
+python data/convert_to_lerobot.py \
+    --input  _out/datasets/franka_pick_demos/dataset.hdf5 \
+    --output _out/datasets/lerobot/franka_pick_place
+# fps 預設讀 HDF5 的 fps attr（collect 寫入＝實際控制頻率 50）；
+# 舊資料集無此 attr → fallback 30 並警告，請改用新版 collect 重收，或手動 --fps 50。
+
+# 不編碼影片（更快，不需要 imageio）
+python data/convert_to_lerobot.py \
+    --input  _out/datasets/franka_pick_demos/dataset.hdf5 \
+    --output _out/datasets/lerobot/franka_pick_place \
+    --no_video
+```
+
+### Step 3：訓練 ACT（IL）
+
+ACT 在獨立的 lerobot conda env 訓練（`scripts/train/act.py` 是 `lerobot-train` 的薄 wrapper）：
+
+```bash
+conda activate lerobot
+# 小量驗證 pipeline（少量 steps、不開 wandb）
+python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier_v0 --smoke
+
+# 正式訓練 + wandb
+python scripts/train/act.py \
+    --dataset _out/datasets/lerobot/franka_barrier_v0 \
+    --steps 100000 --batch-size 16 --wandb --wandb-project franka-vla
+```
+
+**輸出** → `_out/act/<run>/checkpoints/last/pretrained_model`
+
+### Step 4：VLA Eval Harness
+
+`eval/`（VLAWrapper / obs_adapter / EvalRunner）+ `scripts/eval/policy.py` 提供多環境
+rollout 評估，結果輸出到 `_out/eval/eval_YYYY-MM-DD_HH-MM-SS.json`。
+
+```bash
+# Phase 1：用隨機 dummy policy 驗證 pipeline
+./isaaclab.sh -p scripts/eval/policy.py --policy dummy --num_rollouts 20 --headless --enable_cameras
+
+# 評估訓好的 ACT（in-process；需 eval env 裝 lerobot）
+./isaaclab.sh -p scripts/eval/policy.py --policy act --task barrier \
+    --model_path _out/act/<run>/checkpoints/last/pretrained_model \
+    --num_rollouts 20 --headless --enable_cameras
+#   進階：--n_action_steps 25 提高 replan 頻率；--ablate_camera overhead|wrist 量相機貢獻
+
+# SM oracle baseline 對照（pick/barrier）：--oracle-pose gt|noisy 比較 privileged vs 去 privilege
+./isaaclab.sh -p scripts/eval/policy.py --policy oracle --task barrier --oracle-pose gt \
+    --num_rollouts 20 --headless --enable_cameras
+
+# 評估 fine-tuned OpenVLA
+./isaaclab.sh -p scripts/eval/policy.py --policy openvla \
+    --model_path /path/to/finetuned-openvla --unnorm_key franka_pick \
+    --num_rollouts 50 --num_envs 4 --headless --enable_cameras
+```
+
+### （選用）Barrier / two-box 場景
+
+```bash
+# Goal 4：把方塊抓起越過屏障放到另一側，收集 barrier 示範
+./isaaclab.sh -p scripts/collect/demos.py --task barrier --num_demos 50 --headless --enable_cameras
+
+# Goal 1/2：純場景 demo（桌上兩顆方塊，無手臂）
+./isaaclab.sh -p scripts/viz/two_box_table.py
+```
+
+> **Barrier 越障原理**：屏障是 y=0 的一道靜態牆，高度 ≈ 1/3 手臂高（`BARRIER_HEIGHT`）。
+> 越障靠的是把共用 `PickAndPlaceSm` 的 `lift_height` / `approach_height` 調高
+> （見 `envs/tasks/barrier/collect_env.py` 常數），讓方塊在高處水平橫越屏障，之後才在另一側
+> 垂直下降放置——不需另寫狀態機。屏障變高時，請一併調高這兩個高度以維持越障淨空。
+
+### （選用）Goal 3：Push（RL oracle 路徑）
+
+push 不走 SM，而是先用 PPO 訓一個 push teacher、匯出成 TorchScript 後當 oracle 收 demo。
+box 初始隨機化（DR）由 `PUSH_BOX_DR` 環境變數控制，train / collect / eval 共用同一值：
+
+```bash
+# ① 訓練 PPO push teacher（headless，多 env）
+./isaaclab.sh -p scripts/train/push_rl.py --headless --num_envs 4096
+#   tensorboard --logdir _out/rl/franka_push
+
+# ② 匯出最新 checkpoint → policy.pt（jit）
+./isaaclab.sh -p scripts/rl/export_push.py --headless
+
+# （選用）視覺化 / 量成功率
+./isaaclab.sh -p scripts/rl/play_push.py --num_envs 16
+
+# ③ 用 RL oracle 收 push demo → HDF5
+./isaaclab.sh -p scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras
+```
+
+### 已知限制：ACT push 相機依賴（已診斷，修復待跑）
+
+push 的 ACT 會學成**幾乎只依賴隨手移動的 wrist 第一人稱相機、幾乎不用靜態 overhead**，
+closed-loop 一偏離 demo 就 OOD → covariate shift（根因＝box init DR 僅 ±3cm 過窄）。
+完整可復現分析見 [`experiments/act_push_failure/`](experiments/act_push_failure/)
+（`README.md` / `REPORT_SECTION.md`，純資料集 + checkpoint、不需啟動 Isaac Sim）：
+
+```bash
+conda activate lerobot
+bash experiments/act_push_failure/run_all.sh
+```
+
+修復已包成一鍵 pipeline `scripts/fix_push_widen_dr.sh`（把 DR 加大到 ±10–13cm 對齊 barrier，
+重訓 PPO→重收→轉檔→重訓 ACT；先 `SPEEDRUN=1` 驗整條通）——**尚未執行**。
+
+---
+
+## State Machine 狀態說明
+
+| State | 說明 |
+|-------|------|
+| `REST (0)` | 等待初始化（0.2s） |
+| `APPROACH_ABOVE_OBJECT (1)` | 移至物件正上方 +0.1m |
+| `APPROACH_OBJECT (2)` | 下降至物件位置 |
+| `GRASP_OBJECT (3)` | 閉合夾爪（0.3s） |
+| `LIFT_OBJECT (4)` | 抬起至 z=0.3m（越障時調高） |
+| `MOVE_TO_PLACE (5)` | 移至放置目標正上方 |
+| `PLACE (6)` | 下降至放置位置，開夾爪 |
+| `LIFT_AFTER_PLACE (7)` | 垂直拉高（避免撞到方塊） |
+| `RETREAT (8)` | 退回原點 (0.5, 0, 0.4) |
+| `DONE (9)` | 保持位置，等待 env reset |
+
+---
+
+> 完整、最新的英文文件見 [README.md](README.md) 與 [`docs/`](docs/)。
diff --git a/data/convert_to_lerobot.py b/data/convert_to_lerobot.py
index 424e493..a0ec4cd 100644
--- a/data/convert_to_lerobot.py
+++ b/data/convert_to_lerobot.py
@@ -245,7 +245,7 @@ def convert(
     # ── 結束：寫入 per-episode stats，關閉 writer ────────────────────────────
     dataset.finalize()   # v3.0：取代舊的 consolidate(run_compute_stats=True)
 
-    print(f"\n[convert] 完成！")
+    print("\n[convert] 完成！")
     print(f"  Episodes : {n_episodes}")
     print(f"  Output   : {output_dir}")
 
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..491dc8d
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,238 @@
+# sim2act — System Architecture
+
+A deep-dive into the `sim2act` simulation data engine for VLA (vision-language-action) imitation learning on NVIDIA Isaac Lab. The engine drives a Franka Panda arm through three manipulation tasks — **pick**, **barrier** (pick over a wall), and **push** (chained two-cube push to a corner) — and turns simulated rollouts into a trained, closed-loop-evaluable ACT policy.
+
+This document is for foundation-model robotics engineers. It is grounded line-by-line in the source; every claim below points to a concrete class and file. It is intentionally honest about what works and what does not (notably: the push task currently fails closed-loop, and the OpenVLA path is wired but unvalidated).
+
+---
+
+## 1. The data engine, end to end
+
+The pipeline is a linear sequence of stages, each owning one transformation. Two of the stages run a *privileged oracle* whose only job is to produce expert trajectories; everything downstream is sensor-only and oracle-agnostic.
+
+```
+                        sim2act DATA ENGINE
+ ┌───────────────────────────────────────────────────────────────────────────┐
+ │                                                                             │
+ │  (1) SCENE / ENV CONFIG                                                     │
+ │      envs/base/franka_base_env.py                                           │
+ │        FrankaCubeLiftEnvCfg → FrankaSensorEnvCfg → FrankaBaseEnvCfg         │
+ │        + per-task scene (barrier wall / 2nd cube) via task modules          │
+ │            │  Franka + IK-Abs action + overhead RGB-D + wrist RGB           │
+ │            │  + dual fingertip contact + RecorderManager                    │
+ │            ▼                                                                │
+ │  (2) PRIVILEGED ORACLE  ── DUAL DESIGN (§4) ──────────────────────────      │
+ │      ┌─────────────────────────────┐   ┌──────────────────────────────┐    │
+ │      │ pick / barrier:             │   │ push:                        │    │
+ │      │ Warp GPU state machine      │   │ PPO RL teacher (rsl_rl)       │    │
+ │      │ controllers/                │   │ envs/tasks/push/rl/           │    │
+ │      │   pick_place_cube_sm.py     │   │   train_env.py + ppo_cfg.py   │    │
+ │      │   PickAndPlaceSm (10 states)│   │   4096 parallel envs          │    │
+ │      └─────────────┬───────────────┘   └───────────────┬──────────────┘    │
+ │                    │ desired EE pose + gripper (8D)     │ (x,y,yaw)→IK-Abs  │
+ │                    ▼                                    ▼                   │
+ │  (3) DEMO COLLECTION (success-only; cameras ON)                            │
+ │      scripts/collect/demos.py        (SM: pick / barrier)                   │
+ │      scripts/collect/push_rl_demos.py(RL policy replayed w/ sensors)        │
+ │            │  RecorderManager exports EXPORT_SUCCEEDED_ONLY                 │
+ │            ▼                                                                │
+ │  (4) RAW HDF5   _out/datasets/<task>_demos/dataset.hdf5                     │
+ │      obs/ {joint_pos,vel,torque, object_pos/quat, object2_*,               │
+ │            rgb_overhead, rgb_wrist, depth_overhead,                        │
+ │            contact_lfinger/rfinger, ee_pos}   +   actions (T,8 IK-Abs)      │
+ │            │                                                                │
+ │            ▼                                                                │
+ │  (5) LeRobot v3.0 CONVERSION                                               │
+ │      data/convert_to_lerobot.py  (LeRobotDataset.create / add_frame /      │
+ │            save_episode / finalize)                                         │
+ │            │  observation.state (concat), observation.images.{overhead,    │
+ │            │  wrist, depth} (MP4), action (8D), per-frame task string       │
+ │            ▼                                                                │
+ │  (6) ACT IMITATION LEARNING                                               │
+ │      LeRobot ACT, trained via `lerobot-train` (scripts/train/act.py wraps  │
+ │      it); checkpoint dir = pretrained_model                                │
+ │            │  chunked action prediction                                     │
+ │            ▼                                                                │
+ │  (7) CLOSED-LOOP EVAL                                                      │
+ │      scripts/eval/policy.py → eval/eval_runner.py (EvalRunner)             │
+ │            VLAWrapper (eval/vla_wrapper.py): ACTLocal / SMOracle /         │
+ │            Dummy / OpenVLA   ◄── eval/obs_adapter.py builds the obs dict    │
+ │            success-latch → _out/eval/eval_<ts>.json                        │
+ └───────────────────────────────────────────────────────────────────────────┘
+```
+
+Two design invariants hold across the whole pipeline:
+
+- **One action format everywhere.** The recorded action is always an 8D IK-absolute end-effector pose + gripper (`[x,y,z, qw,qx,qy,qz, gripper]`). The push RL teacher internally outputs only `(x, y, yaw)`, but a dedicated recorder re-expands it to the same 8D before it ever touches HDF5 (§4.2). Downstream (LeRobot, ACT, eval) therefore never knows which oracle produced a demo.
+- **One source of truth for sensors.** The camera/contact specs live in exactly one place (`envs/base/franka_base_env.py`) and are inherited by both the collect env and the eval env, so the training and evaluation distributions cannot silently drift (§2).
+
+Two conda environments split the work:
+
+- **`isaaclab`** runs everything that needs the simulator: SM/RL collection, PPO training (`scripts/train/push_rl.py`), LeRobot conversion, ACT training (in the fix pipeline), and closed-loop eval.
+- **`lerobot`** (LeRobot 0.5.2) runs the no-simulator failure analysis (teacher-forcing L1 and camera ablation on the dataset alone — see `experiments/act_push_failure/`).
+
+---
+
+## 2. Env-config inheritance (`envs/base/`)
+
+The config chain is deliberately shallow and each layer adds exactly one concern. From `envs/base/franka_base_env.py`:
+
+```
+FrankaCubeLiftEnvCfg            (Isaac Lab stock: robot + IK-Abs action + lift task)
+   │
+FrankaSensorEnvCfg             + overhead RGB-D + wrist RGB cameras       ← perception layer
+   ├── FrankaPickVLAEvalEnvCfg + shorter episode; NO recorder            ← eval branch
+   │
+FrankaBaseEnvCfg               + dual fingertip contact + RecorderManager ← collection layer
+   ├── FrankaPickVLAEnvCfg     pick-and-place (single cube)
+   └── FrankaBarrierEnvCfg     + barrier wall (envs/tasks/barrier/collect_env.py)
+       (push collect: FrankaPushRLCollectEnvCfg also extends FrankaBaseEnvCfg)
+```
+
+**`FrankaSensorEnvCfg`** is the single source of truth for perception. In `__post_init__` it attaches `overhead_camera` (224×224, RGB + `distance_to_image_plane`, mounted at `pos=(0.5, 0.0, 1.5)` looking straight down) and `wrist_camera` (224×224 RGB, mounted on `panda_hand`). It also disables the stock LiftEnv `object_pose` command debug marker — that marker is a real scene prim and would otherwise be photographed by both cameras, contaminating both collected data and eval imagery. Disabling only `debug_vis` keeps the command itself available for obs/reward terms.
+
+**`FrankaBaseEnvCfg`** is the collection layer. On top of perception it:
+- sets `episode_length_s = 20.0` (≈1000 steps, long enough that the SM finishes before any env timeout/reset),
+- flips `robot.spawn.activate_contact_sensors = True` (the stock Franka cfg defaults this off, which would make every contact reading zero),
+- adds two `ContactSensorCfg` on `panda_leftfinger` / `panda_rightfinger` (net contact force per fingertip),
+- installs an `ActionStateRecorderManagerCfg` with `dataset_export_mode = EXPORT_SUCCEEDED_ONLY` (only successful episodes are written),
+- and replaces the stock flat-obs recorder with `PreStepStructuredObsRecorderCfg` (the structured multimodal recorder, §3).
+
+**`FrankaPickVLAEvalEnvCfg`** is the key asymmetry. Rather than inherit from `FrankaBaseEnvCfg` and *turn off* the recorder, the eval branch forks directly off `FrankaSensorEnvCfg`. It is therefore born without a recorder or contact sensors — there is nothing to disable — while still sharing the exact camera config used during collection. It sets `episode_length_s = 10.0`, chosen as a safety margin over the harness budget (`max_steps=300 × control_dt=0.02s = 6.0s`); the actual episode end is owned by `EvalRunner`, not by the env timeout.
+
+### 2.1 How tasks extend the base, and the eval/collect split
+
+Per-task scene deltas are applied in each task's own module and surfaced through a registry, `envs/tasks/task_presets.py`, which is the single source of truth for *both* collect and eval specs.
+
+- **pick** (`FrankaPickVLAEnvCfg`) is a no-op extension of the base — it reuses the single `scene.object` cube; the fixed place target is passed to the state machine by the collect script, not baked into the env.
+- **barrier** (`envs/tasks/barrier/collect_env.py`) adds a static collision wall and a `+y` cube spawn via `apply_barrier_scene(cfg)`. Crucially, that same function is called by the barrier *eval* env (`FrankaBarrierVLAEvalEnvCfg`, in `envs/tasks/barrier/eval_env.py`), so the wall geometry, spawn point, and init randomization are guaranteed identical between collection and eval. (A prior bug evaluated barrier against the *pick* env — no wall — guaranteeing 0%; the shared-function design exists specifically to prevent that class of drift.)
+- **push** (`envs/tasks/push/eval_env.py`, `FrankaPushVLAEvalEnvCfg`) adds the second cube (`object2`) and pins the closed gripper. Its eval env imports `PushEventCfg` directly from the RL training env so the init-randomization radius (`PUSH_BOX_DR`) is shared by train, collect, and eval from one place.
+
+The registry encodes each task as a `TaskPreset` (collect side) holding an `EvalSpec` (eval side). The lazy `make_env_cfg` / `make_eval_env_cfg` factories defer all env imports until after `AppLauncher` has started Omniverse — a hard ordering constraint noted at the top of the file (env cfg modules must not be imported at module load). `place_target` is defined once per task and shared by collect and eval, eliminating the "place target written in two places" footgun. `compute_success(...)` centralizes the per-task collection success test (`sm_done` for pick, geometric `object_to_target` for barrier).
+
+---
+
+## 3. Observation and action spec
+
+### 3.1 What gets recorded (HDF5 `obs/`)
+
+`PreStepStructuredObsRecorder` (`envs/base/recorders.py`) returns a structured dict each pre-step, which the RecorderManager expands into `obs/<key>`:
+
+| Key | Shape | Meaning |
+|---|---|---|
+| `joint_pos` | (T, 9) | absolute joint angles |
+| `joint_vel` | (T, 9) | joint velocities |
+| `joint_torque` | (T, 9) | applied torque / effort |
+| `object_pos` / `object_quat` | (T, 3) / (T, 4) | primary cube pose, env-local frame, quat wxyz |
+| `ee_pos` | (T, 3) | TCP position (env-local), via `ee_frame` |
+| `rgb_overhead` | (T, 224, 224, 3) | uint8 |
+| `rgb_wrist` | (T, 224, 224, 3) | uint8 |
+| `depth_overhead` | (T, 224, 224, 1) | float32 metres |
+| `contact_lfinger` / `contact_rfinger` | (T, 3) | net fingertip contact force (world frame, N) |
+| `object2_pos` / `object2_quat` | (T, 3) / (T, 4) | second cube — push only, emitted when `object2` exists in the scene |
+
+Sensor summary: **overhead RGB-D (224×224) + wrist RGB (224×224) + joint pos/vel/torque + dual fingertip contact forces.** IMU was deliberately dropped (design note D4): under a fixed base, hand linear/angular acceleration is recoverable from joint state + FK, so it is redundant.
+
+### 3.2 The 8D action
+
+The action is **8-dimensional**: a 7D IK-absolute end-effector pose `[x, y, z, qw, qx, qy, qz]` (env-local) plus 1 gripper command. The downward EE orientation is `wxyz = [0, 1, 0, 0]` (180° about X). Gripper convention: `+1.0 = open`, `-1.0 = close`. Control runs at ~50 Hz (`control_dt = sim.dt × decimation = 0.01 × 2 = 0.02 s`).
+
+For the SM tasks the recorder uses Isaac Lab's stock `PreStepActionsRecorder` (the SM already emits an 8D IK-Abs pose). For push, the RL action is only 3D, so a custom `PreStepPlanarPushActionRecorder` reads the internally-expanded 7D pose (`_pose_command`) from the action term and appends a constant gripper value (`gripper_value`, default `0.0`) to write 8D — keeping the on-disk format identical (§4.2).
+
+### 3.3 The eval-time observation (`eval/obs_adapter.py`)
+
+At eval time, `build_obs_dict(env, task_lang)` reconstructs a live obs dict from the scene *after* `env.step()` (PhysX updates sensor buffers at step end). It returns `joint_pos`, `joint_vel`, `ee_pos`, `ee_quat` (from the `ee_frame` FrameTransformer), `object_pos`, `object_quat`, `rgb_overhead`, `rgb_wrist`, and `task_lang`. `_get_rgb` normalizes TiledCamera output to 3-channel (dropping alpha if the camera yields RGBA). Note that ACT in practice consumes only a lean state slice (`joint_pos`, `joint_vel`) plus the two RGB images — see §6.
+
+---
+
+## 4. The dual-oracle design
+
+The single most important architectural decision is that **the two task families use two different privileged teachers**, chosen by the geometry of the task, not by convenience.
+
+### 4.1 Pick / barrier → Warp GPU state machine
+
+`controllers/pick_place_cube_sm.py` implements `PickAndPlaceSm`, a finite state machine compiled as a **Warp GPU kernel** (`infer_state_machine`) that runs across all parallel envs in lockstep. It has **10 states** (`PickSmState`):
+
+```
+0 REST → 1 APPROACH_ABOVE_OBJECT → 2 APPROACH_OBJECT → 3 GRASP_OBJECT
+→ 4 LIFT_OBJECT → 5 MOVE_TO_PLACE → 6 PLACE → 7 LIFT_AFTER_PLACE
+→ 8 RETREAT → 9 DONE
+```
+
+Each state emits a desired EE transform + a gripper command (`GripperState.OPEN/CLOSE`) and advances when the EE is within `position_threshold` of its target *and* a per-state dwell time (`PickSmWaitTime`) has elapsed. `compute(ee_pose, object_pos, place_pos)` does the wxyz↔xyzw conversions Warp's `wp.transform` expects, launches the kernel, and returns an 8D action (`des_ee_pose` + gripper). `is_done()` reports `sm_state == 9`, which the collect loop uses to latch success.
+
+The state machine is parameterized, not forked: **barrier reuses the very same `PickAndPlaceSm`**, only with raised waypoints. `envs/tasks/barrier/collect_env.py` passes `lift_height=0.50`, `approach_height=0.32`, and `retreat_pos=(0.50, -0.22, 0.45)` so the cube is carried *high over* the wall (wall height `BARRIER_HEIGHT = 0.30 m`, with its top at `z ≈ 0.33` env-local) and only descends vertically on the far side; the retreat point sits high on the placing side to avoid re-crossing the wall.
+
+**Why a scripted oracle here:** pick and barrier are *kinematic* tasks — grasp a rigid body, move it along a collision-free arc, release. The solution is a small fixed sequence of EE waypoints with known geometry. A state machine expresses that exactly, runs at GPU scale with zero training, and is perfectly reproducible. The barrier results bear this out: the resulting ACT student reaches **90% in-distribution success vs the SM oracle's 75%** — i.e. the imitation student *beats its own privileged teacher* (the SM occasionally clips the wall; ACT learned a smoother, more reliable cross). OOD at `init_scale 1.5` drops to **55%**, which is the honest generalization ceiling of the in-distribution demo set.
+
+### 4.2 Push → PPO RL teacher
+
+Push is handled by an RL teacher under `envs/tasks/push/rl/`, trained with **rsl_rl PPO across 4096 parallel envs**.
+
+- **Env** (`train_env.py`, `FrankaPushRLEnvCfg`): swaps the arm action to `PlanarPushAction`, removes the gripper action and pins fingers closed (pure pusher), adds the second cube `object2`, and installs push-specific obs/reward/termination/event managers. Observations are *state-based and camera-free* (`PushObsCfg`: joint pos/vel, privileged box1/box2/EE positions, the constant corner target, last action) so 4096 envs run fast. `num_envs = 4096`, `episode_length_s = 8.0`.
+- **Action** (`action.py`, `PlanarPushAction`): the policy outputs only **3D `(x, y, yaw)`**. The term subclasses Isaac Lab's `DifferentialInverseKinematicsAction` and, in `process_actions`, expands those 3 numbers into a full 7D pose — relative `dx, dy` from the *current* EE pose, `z` locked at `push_height`, orientation fixed downward and rotated by an absolute `yaw` about world-Z — then hands it to the proven differential-IK controller. The minimal action space speeds PPO convergence; computing from the live EE pose every step makes it closed-loop by construction.
+- **Reward** (`mdp.py` + `PushRewardsCfg`): a chained-push shaping stack. Success is *not* "box2 reached the corner" alone — that admits a shortcut (gripper directly shoving box2). It is "box2 in corner **and** box1 still adjacent to box2" (`_success_mask`), plus a penalty on the EE approaching box2 (term `ee_away_from_box2`, which wraps `ee_near_box2` with a negative weight) to forbid bypassing box1. Progress terms are potential-based (velocity toward the corner, no idle baseline).
+- **PPO** (`ppo_cfg.py`, `PushPPORunnerCfg`): actor/critic `[256,128,64]` ELU, `entropy_coef=0.006`, `gamma=0.98`, adaptive-KL LR schedule, `max_iterations=3000`. The original report records **~98.5% training success** for this teacher.
+
+**Why RL here:** push is *contact-rich and non-prehensile*. There is no grasp; the cube is moved by frictional contact, the contact point drifts, box1 must stay aligned on the box1→corner line while transmitting force to box2. There is no simple closed-form waypoint sequence — the policy must learn a feedback controller over contact dynamics. PPO with dense shaping is the right tool; a scripted SM is not.
+
+**The bridge that keeps the pipeline uniform** (`envs/tasks/push/rl/collect_env.py`, `FrankaPushRLCollectEnvCfg`): collection re-hosts the trained policy on top of `FrankaBaseEnvCfg` (cameras + recorder + contact), reusing the *identical* `PlanarPushAction` and `PushObsCfg` so the exported policy runs natively — zero obs rebuild, zero action recompute. It then swaps in `PreStepPlanarPushActionRecorderCfg` (assigned to `recorders.record_pre_step_actions`), which records the 8D IK-Abs pose the IK layer actually commanded. Result: push demos land on disk in the same 8D format as pick/barrier, and ACT training needs no push-specific code.
+
+### 4.3 Why push currently fails (and the held fix)
+
+Despite the ~98.5% RL teacher and a clean dataset, the **push ACT student scores 0% closed-loop (pre-fix)**. This is a genuine, diagnosed failure, not a bug:
+
+- The model *did* learn the demos: **teacher-forcing EE-xy L1 = 0.011 m**.
+- But it learned a *shortcut*. Camera ablation on the dataset shows the push policy is far more sensitive to the wrist camera than the overhead: **black-wrist Δ = 0.197 vs black-overhead Δ = 0.038**. (Contrast barrier, which relies robustly on the overhead: **black-overhead Δ = 0.089 vs black-wrist Δ = 0.027**.)
+- **Root cause:** the push init randomization is only **±3 cm** (`PUSH_BOX_DR = 0.03`), versus barrier's ±13/±7 cm. With cubes nearly fixed, the static overhead camera is uninformative, so the policy leans on the *moving* wrist camera — a feature that is reliable under teacher forcing but spirals under closed-loop covariate shift.
+- **Fix:** widen the push init DR to ±10–13 cm via the `PUSH_BOX_DR` env var, which `train_env.py` threads through one shared `PushEventCfg` to train/collect/eval, then re-train end to end (`scripts/fix_push_widen_dr.sh`: PPO → export → recollect → convert → ACT).
+
+The after-fix closed-loop number is **_(pending fix run)_** — the retrain is held and no validated post-fix result exists yet.
+
+> **Extension point — OpenVLA / Octo / π0:** `OpenVLAWrapper` (§6) is wired into the eval harness and converts a 7D VLA output to the 8D IK-Abs format, but it is **unvalidated** — no run has confirmed it produces useful actions. Treat it as scaffolding, not a working baseline.
+
+---
+
+## 5. LeRobot v3.0 conversion (`data/convert_to_lerobot.py`)
+
+`convert(...)` turns the RecorderManager HDF5 into a LeRobot v3.0 dataset using the LeRobot API directly (`LeRobotDataset.create` → `add_frame` → `save_episode` → `finalize`).
+
+- **State assembly.** `observation.state` is the concatenation, in fixed order, of whatever `OBS_STATE_KEYS` are present: `joint_pos, joint_vel, joint_torque, object_pos, object_quat, object2_pos, object2_quat, contact_lfinger, contact_rfinger`. `--state_keys` can narrow this (e.g. `joint_pos,joint_vel`) to match the lean eval obs; the per-dimension `names` are generated automatically.
+- **Images → video.** `rgb_overhead` → `observation.images.overhead`, `rgb_wrist` → `observation.images.wrist`, each declared as `dtype="video"` (224×224×3) and encoded to MP4.
+- **Depth.** `depth_overhead` (float32 m) is clamped to `DEPTH_MAX_RANGE=2.0 m`, normalized to a 3-channel uint8, and stored as `observation.images.depth` video. The lossless float remains in the HDF5; `--no_depth` skips it (used in the push lean config).
+- **Action.** Written verbatim as the 8D `action` feature, with `names = panda_joint_0..6 + gripper`.
+- **Task string.** v3.0 requires a per-frame `task`; it is attached on every `add_frame` and `save_episode` carries it.
+- **fps.** Defaults to the HDF5 `fps` attr (collect writes the true control rate = 50); legacy datasets without the attr fall back to 30 with a warning.
+- **Safety.** `success_only=True` by default skips any non-success demo (defense against mixed sets, even though collection is already `EXPORT_SUCCEEDED_ONLY`); an existing output dir is removed and rebuilt.
+
+Output layout (`meta/`, `data/chunk-000/`, `videos/chunk-000/observation.images.*`) is generated by the API.
+
+---
+
+## 6. The eval harness
+
+Closed-loop evaluation is a clean separation between *what policy is driving* and *how an episode is scored*.
+
+### 6.1 `VLAWrapper` abstraction (`eval/vla_wrapper.py`)
+
+An abstract base with one required method, `predict_action(obs) -> (N, 8)` IK-Abs, and an optional `reset(env_ids)` for recurrent/queued state. Four implementations:
+
+- **`DummyVLAWrapper`** — random EE pose biased into the reachable workspace, fixed downward orientation, random gripper. Purpose: verify the harness end-to-end; expected ~0% success.
+- **`ACTLocalWrapper`** — loads a LeRobot `ACTPolicy` **in-process** (no socket server; the server fallback was removed). It feeds the lean state (`joint_pos`, `joint_vel`) plus overhead+wrist RGB (permuted to NCHW, `/255`) and returns the post-processed 8D action with the quaternion re-normalized. It supports two operationally important knobs:
+  - `n_action_steps` — an inference-time replan-frequency override. The default equals `chunk_size=100` (≈open-loop); lowering it re-plans more often to fight covariate shift. (Presets set this: barrier `50`, push `50`.)
+  - `ablate_camera` — zero out the overhead or wrist input at inference, the closed-loop counterpart of the dataset ablation that exposed the push shortcut.
+  It also patches a missing top-level `"type":"act"` in some checkpoints' `config.json` (a known cross-version LeRobot loader quirk).
+- **`SMOracleWrapper`** — wraps the *same* `PickAndPlaceSm` (via `preset.make_sm`) as a de-privileged-oracle baseline. `oracle_pose="gt"` feeds ground-truth object pose; `oracle_pose="noisy"` injects a persistent per-episode pose offset (`PoseNoiseCfg`) to emulate perception error. Output is the SM's native 8D, so the runner needs zero changes. (Not available for push — push has no SM.)
+- **`OpenVLAWrapper`** — fine-tuned OpenVLA via HF `AutoModelForVision2Seq`; converts its 7D output `[x,y,z,qx,qy,qz,gripper]` to 8D by recovering `qw` from the unit-quaternion constraint and binarizing the gripper. **Wired but unvalidated** — the documented extension point for OpenVLA / Octo / π0.
+
+### 6.2 `EvalRunner` success-latch (`eval/eval_runner.py`)
+
+The runner steps a (possibly multi-env) Isaac Lab env, queries the policy, and scores with a **geometric success latch** rather than a terminal check — because `GymnasiumEnv` auto-resets the instant an env terminates, so reading object position *after* a terminal step would read the already-reset state. Instead `success_latch[i]` is set true if the target ever comes within `success_threshold` of the place target during the episode.
+
+The success predicate is per-task, driven by the `EvalSpec`:
+- pick/barrier: target = `object`, within threshold **and** gripper open (released).
+- push: target = `object2`, within threshold, **and** box1 (`chained_object`) is within `chained_gap` (default 0.12 m) of box2 — computed by `_chained_gap()` on the xy positions — with `require_gripper_open=False` (pusher stays closed).
+
+An episode ends on any of three triggers: env-terminated/truncated (`dones`), our `max_steps` timeout (manual `_reset_idx`), or a success early-stop after `success_hold_steps` consecutive in-target steps (makes `steps` meaningful as steps-to-success). Each episode's cube init position is logged so OOD success can be binned by init geometry. `_finalize` averages steps over *successful* episodes only and writes `_out/eval/eval_<ts>.json`.
+
+Note the deliberate asymmetry in episode budgets: `EvalSpec.max_steps` defaults to 300 but barrier overrides to 400 (its demos run ~290 steps, so 300 would risk a spurious timeout/0%). This is the same source-of-truth registry (`envs/tasks/task_presets.py`) that defines the collection specs — collect and eval read their per-task knobs from one place.
diff --git a/Project-Corvinus-Report.pdf b/docs/archive/Project-Corvinus-Report.pdf
similarity index 100%
rename from Project-Corvinus-Report.pdf
rename to docs/archive/Project-Corvinus-Report.pdf
diff --git a/R&S Challenge.md b/docs/archive/R&S Challenge.md
similarity index 100%
rename from R&S Challenge.md
rename to docs/archive/R&S Challenge.md
diff --git a/docs/archive/README.md b/docs/archive/README.md
new file mode 100644
index 0000000..ec88aa5
--- /dev/null
+++ b/docs/archive/README.md
@@ -0,0 +1,28 @@
+# Archive — original challenge brief & submission
+
+This project began as a take-home challenge for a **Robotics & Sim (R&S)** team building
+**simulation-to-real pipelines for VLA models** in laboratory robotics. The brief's thesis —
+> *"the challenge is not the quantity of the data, but the quality"* —
+in notoriously messy lab environments is exactly the motivation behind the public **sim2act**
+data engine.
+
+These files are kept here for **provenance only**. The main project (repo-root
+[`README.md`](../../README.md)) is the generalized, public version and does not depend on anything
+in this folder.
+
+- [`R&S Challenge.md`](R&S%20Challenge.md) — the original challenge brief (Goals 1–5).
+- [`Project-Corvinus-Report.pdf`](Project-Corvinus-Report.pdf) — the original written submission.
+  ("Corvinus" was the submission codename, now retired in favor of **sim2act**.)
+
+## How the original goals map to the repo
+
+| Goal | Brief | Where in sim2act |
+|---|---|---|
+| 1 | A box on a table | `envs/scenes/two_box_table_env.py` |
+| 2 | A second box | same scene (two boxes) |
+| 3 | Push box-1 into box-2 to move it to a table corner | `envs/tasks/push/` — PPO RL oracle |
+| 4 | Pick a block over a ⅓-arm-height barrier to the other side | `envs/tasks/barrier/` — Warp state machine |
+| 5 | Collect realistically usable sensor data (RGB, tactile, joint-state, …) | `envs/base/` — overhead RGB-D + wrist RGB + joint pos/vel/torque + fingertip contact forces |
+
+> Per the brief, generative-AI use was permitted provided the prompts were disclosed; that
+> disclosure accompanied the original submission.
diff --git a/docs/case-study-push.md b/docs/case-study-push.md
new file mode 100644
index 0000000..72d0d33
--- /dev/null
+++ b/docs/case-study-push.md
@@ -0,0 +1,146 @@
+# Case Study: Why the ACT Push Policy Scored 0% — A Camera-Reliance Shortcut
+
+A post-mortem of a hard, total failure in the sim2act data engine: an ACT (Action Chunking Transformer) policy trained on RL-expert demonstrations that **never touches the box it is supposed to push**, and drives straight for the corner instead. The same perception stack and training pipeline succeed on the `barrier` task, which gives us a clean control group and turns "the model is bad" into a specific, falsifiable root cause.
+
+Every number below is reproducible from `experiments/act_push_failure/` (no simulator required). Sources:
+
+- `experiments/act_push_failure/act_camera_ablation.py` — the diagnostic (E1 teacher-forcing + E2 per-camera ablation).
+- `experiments/act_push_failure/results/push_summary.json`, `experiments/act_push_failure/results/barrier_summary.json` — the evidence.
+- `experiments/act_push_failure/README.md`, `experiments/act_push_failure/REPORT_SECTION.md` — the original (Chinese) analysis this English case study is promoted from.
+
+---
+
+## 1. Problem
+
+**Task.** Push `box1` into `box2`, then push `box2` into the table corner. The teacher is a PPO policy (privileged state observations, 4096 parallel envs, ~98.5% training success per the original report); we collect successful rollouts as demonstrations and train ACT on the RGB + proprioception observation that a real robot would have.
+
+**Symptom.** The trained ACT push policy has **0% closed-loop success (pre-fix)**. It does not approach `box1` at all. From the very first step the end-effector drives diagonally forward — past `box2` — toward the corner of the table, as if the boxes were not there.
+
+**Why this is alarming.** The exact same sensor suite (overhead RGB-D 224×224 + wrist RGB 224×224 + joint pos/vel/torque + dual fingertip contact forces; 8D action = 7D IK-absolute EE pose + 1 gripper; ~50 Hz control) and the same `collect → LeRobot convert → ACT train → eval` pipeline produce a **working** `barrier` policy that reaches **90% in-distribution success and actually beats its 75% SM oracle teacher** (OOD at `init_scale` 1.5 drops to 55%). So the failure is not in ACT, the wrapper, or the data plumbing in general — it is specific to the push setup. The job of this case study is to localize it.
+
+---
+
+## 2. Root Cause (one paragraph)
+
+The push policy **learned to rely almost entirely on the moving, egocentric wrist camera, and to essentially ignore the static overhead camera.** The wrist view is tightly correlated with the action (the camera rides the end-effector), so it is trivially easy to fit in-distribution and yields a low training loss — but it is fragile. In closed loop, the moment the executed trajectory deviates slightly from the demonstrations, the wrist camera sees a viewpoint that never appeared in training (out-of-distribution); the action error grows, which moves the wrist view further off-distribution, which grows the error again — a textbook **covariate-shift spiral**. The policy collapses back to a vision-independent prior: "drive to the corner."
+
+The upstream cause is the **data distribution, not the model.** Push initialized the boxes with only **±3 cm** of domain randomization (`PUSH_BOX_DR`, default `0.03`, in `envs/tasks/push/rl/train_env.py`). Across every demo the static overhead camera therefore sees almost the same picture, so it carries no discriminative signal about *where* to push. The policy takes the path of least resistance and latches onto the one input that *does* vary with the action — the wrist camera — as a shortcut. Barrier, by contrast, randomizes by **±13 / ±7 cm**, which forces the policy onto the viewpoint-robust overhead camera, and so it generalizes.
+
+---
+
+## 3. Evidence
+
+The analysis runs as two dataset-only experiments (no Isaac Sim), with `barrier` as the control group. Both are produced by a single script, `act_camera_ablation.py`, which loads one demo episode and feeds each recorded frame through **the exact same `ACTLocalWrapper`** used at eval time (same `ACTPolicy`, same pre/post-processors).
+
+### E1 — Teacher-forcing replay (rules out under-training / normalizer / mean-collapse)
+
+Feed the model the recorded ground-truth observation frame by frame and compare its predicted action against the recorded action. If this L1 is small, the inference path is correct and **the model genuinely learned the demonstrations**.
+
+| Metric (push, episode 0, 40 frames) | Value |
+|---|---|
+| Mean L1, all 8 action dims | 0.0087 |
+| **Mean L1, EE-xy** | **0.011 m** |
+
+An end-effector-xy reproduction error of **0.011 m** on teacher-forced demo frames is tight. This eliminates the cheap explanations: it is **not** a LeRobot/normalizer version mismatch, **not** under-training, and **not** mean-collapse (the model is not emitting a constant). The model learned the mapping; the failure must be about *which input it learned to trust* and what happens when that input goes off-distribution in closed loop.
+
+### E2 — Per-camera ablation (locates the shortcut)
+
+For the same frames, black out one camera at a time and measure how far the predicted EE-xy target moves (`|Δ|`, meters). A large shift = the policy depends heavily on that camera.
+
+| Task | Black overhead Δ | Black wrist Δ | Primary reliance | Closed-loop eval |
+|---|:---:|:---:|---|:---:|
+| **push** | 0.038 | **0.197** | **wrist (fragile)** | Fail (0%) |
+| **barrier** | **0.089** | 0.027 | **overhead (robust)** | Success (90%) |
+
+The push numbers are decisive: blacking the **wrist** moves the target by **0.197 m** while blacking the **overhead** barely moves it (**0.038 m**) — a ~5× asymmetry. The push policy is heavily wrist-dependent. (Note this is the *opposite* of causal confusion / ignoring the cameras — the policy over-relies on vision, just on the *wrong* camera.)
+
+**The blacked-wrist target reproduces the failure symptom exactly.** With the wrist camera removed, the push policy's mean EE-xy target collapses to **[0.64, 0.25]** — past `box2`, out toward the corner — the very behavior observed in closed-loop rollouts (`black_wrist_mean_xy` in `push_summary.json`). This closes the causal loop: *wrist view fails ⇒ the exact behavior we see*. In closed loop the wrist view does not need to be blacked out; covariate shift degrades it continuously, with the same effect.
+
+### The barrier control group (why barrier succeeds and push fails)
+
+Barrier is the same code path with a working outcome, and its ablation is **mirror-imaged**: it depends on the **overhead** camera (Δ = 0.089) and is nearly indifferent to the wrist (Δ = 0.027). The overhead camera is static and sees the whole scene, so its view is stable under trajectory deviations — there is no covariate-shift spiral to fall into. This is exactly why barrier generalizes and push does not, and it confirms the difference is the *learned camera reliance*, not anything else in the stack.
+
+### Why the same camera config yields opposite reliance
+
+The two tasks share an identical camera rig; only the data distribution and task geometry differ:
+
+| Factor | push | barrier |
+|---|---|---|
+| Box init DR | **±3 cm** (`PUSH_BOX_DR`, `envs/tasks/push/rl/train_env.py`) | **±13 / ±7 cm** (`envs/tasks/barrier/collect_env.py`) |
+| Demo length × count | ~40 frames × 50 | ~287 frames |
+| Task nature | low, in-contact pushing (wrist sits near the object, highly action-correlated) | pick-and-place (overhead sees the global layout) |
+
+Narrow DR ⇒ the static overhead view is nearly constant across all demos ⇒ no discriminative signal ⇒ the policy grabs the action-correlated wrist camera as a shortcut. (A supporting E3 check dumps the training images and confirms they are not blank — push wrist frames have mean intensity ≈115 — so this is a learned-reliance problem, not corrupt data.)
+
+**Conclusion.** The failure is not model capacity, data quality, or the inference implementation. It is a **learned dependence on a fragile egocentric camera, caused upstream by too-narrow initial-state randomization.**
+
+---
+
+## 4. The Fix
+
+Widen push initial-state randomization to match the barrier scale, regenerate demos with the RL expert (cheap, fully automatic), and retrain ACT. DR is controlled by the `PUSH_BOX_DR` environment variable, which is read by a single `PushEventCfg` shared across train / collect / eval, so one setting stays consistent end-to-end.
+
+```bash
+conda activate isaaclab
+SPEEDRUN=1 ./scripts/fix_push_widen_dr.sh   # verify the whole pipeline first (small run)
+./scripts/fix_push_widen_dr.sh              # full: PUSH_BOX_DR ±10–13 cm, PPO → 50 demos → ACT
+```
+
+The script (`scripts/fix_push_widen_dr.sh`) runs the full pipeline with the box init DR widened to the barrier scale (**±10–13 cm**, vs barrier's ±13/±7 cm). It exports the radius through `PUSH_BOX_DR`; the script's `BOX_DR` default is `0.10` (±10 cm) and is overridable (e.g. `BOX_DR=0.12`). The steps: retrain PPO (4096 envs) → export TorchScript policy → re-collect demos with cameras on → convert to LeRobot → retrain ACT.
+
+**Mechanism.** Wider DR makes the overhead camera vary meaningfully across demos, restoring its discriminative signal, while also broadening wrist-view coverage so off-trajectory viewpoints are less out-of-distribution. Both pressures push the policy off the wrist shortcut.
+
+**Expected primary signal (the falsifiable prediction).** Re-run the *same* `act_camera_ablation.py` on the retrained checkpoint. We expect the reliance to **flip toward overhead** — i.e. the **black-overhead Δ rises** and the **black-wrist Δ falls** — converging toward the barrier-like profile (overhead Δ > wrist Δ). Honest read-out criteria, defined in advance:
+
+- **Success:** black-overhead Δ becomes clearly larger than black-wrist Δ **and** closed-loop push success rises well above 0%.
+- **Partial:** the camera-reliance asymmetry shrinks or flips, but closed-loop success is still low → DR helped but is not sufficient on its own; escalate to the secondary mitigations below.
+- **No change:** ablation profile and 0% success persist → the wrist shortcut is not driven by DR alone; revisit the hypothesis rather than tuning blindly.
+
+**Secondary mitigations, by ROI** (if the DR fix is only partial):
+
+1. Retrain an **overhead-only** variant (drop the wrist camera). If that alone works, it 100% confirms the wrist-dependence diagnosis.
+2. Collect more and longer demos; add wrist-image augmentation (crop / color jitter).
+3. **DAgger** or inject action noise during collection to cover off-trajectory wrist viewpoints — directly attacking the covariate shift.
+
+---
+
+## 5. Before → After
+
+The retrain is intentionally held, so the after-fix column is a placeholder. **No after-fix numbers are fabricated.**
+
+| Signal | Before (±3 cm DR) | After (±10–13 cm DR) |
+|---|:---:|:---:|
+| Black-overhead Δ (m) | 0.038 | _(pending fix run)_ |
+| Black-wrist Δ (m) | 0.197 | _(pending fix run)_ |
+| Primary camera reliance | wrist (fragile) | _(pending fix run)_ |
+| Teacher-forcing EE-xy L1 (m) | 0.011 | _(pending fix run)_ |
+| Closed-loop push success | 0% | _(pending fix run)_ |
+
+Target shape of the "after" column: overhead Δ > wrist Δ (barrier-like), with closed-loop success well above 0%.
+
+---
+
+## 6. Reproduce (~5 min, no simulator)
+
+The entire diagnostic runs off the saved dataset + checkpoint — no Isaac Sim needed. It uses the `lerobot` conda environment (lerobot 0.5.2), the same one used for the no-simulator failure analysis.
+
+```bash
+conda activate lerobot
+bash experiments/act_push_failure/run_all.sh
+```
+
+Outputs land in `experiments/act_push_failure/results/`:
+
+- `push_summary.json` / `push_replay.log` — E1 + E2 for push.
+- `barrier_summary.json` / `barrier_replay.log` — the control group.
+- `images/push_{wrist,overhead}_*.png` — dumped training frames (E3).
+
+To validate a retrained checkpoint, point the same script at the new dataset/model (see the eval and ablation commands printed at the end of `scripts/fix_push_widen_dr.sh`) and check whether the camera-reliance asymmetry has flipped.
+
+---
+
+## Appendix — Notes and caveats
+
+- **Numbers** are quoted verbatim from `push_summary.json` / `barrier_summary.json`. The barrier black-overhead Δ is `0.089` (the raw JSON stores `0.08947…`; the original Chinese README/REPORT_SECTION round it to `0.090`).
+- **OpenVLA / Octo / π0** are an extension point in this repo: `OpenVLAWrapper` is wired but **unvalidated** — this case study concerns ACT only and makes no claim about those backbones.
+- **The "after" column is unverified.** Treat the fix as a hypothesis with a pre-registered, falsifiable success criterion, not a settled result.
diff --git a/docs/images/act_barrier_success.gif b/docs/images/act_barrier_success.gif
new file mode 100644
index 0000000..f1888d1
Binary files /dev/null and b/docs/images/act_barrier_success.gif differ
diff --git a/docs/images/act_push_fail.gif b/docs/images/act_push_fail.gif
new file mode 100644
index 0000000..397f81e
Binary files /dev/null and b/docs/images/act_push_fail.gif differ
diff --git a/docs/images/act_push_fail_no_wrist.gif b/docs/images/act_push_fail_no_wrist.gif
new file mode 100644
index 0000000..a6eb22b
Binary files /dev/null and b/docs/images/act_push_fail_no_wrist.gif differ
diff --git a/docs/images/rl_push_suc_16.gif b/docs/images/rl_push_suc_16.gif
new file mode 100644
index 0000000..7cb1f72
Binary files /dev/null and b/docs/images/rl_push_suc_16.gif differ
diff --git a/docs/images/sm_pick_9.gif b/docs/images/sm_pick_9.gif
new file mode 100644
index 0000000..67d0dbc
Binary files /dev/null and b/docs/images/sm_pick_9.gif differ
diff --git a/docs/results.md b/docs/results.md
new file mode 100644
index 0000000..88b649c
--- /dev/null
+++ b/docs/results.md
@@ -0,0 +1,128 @@
+# Results
+
+Consolidated evaluation results for the **sim2act** simulation data engine on NVIDIA Isaac Lab. Three Franka manipulation tasks — `pick`, `barrier` (pick-over-barrier, prehensile), and `push` (box-into-box-to-corner, non-prehensile) — built on a single pipeline: a **privileged oracle** collects high-quality demonstrations, which an **ACT** (Action Chunking Transformer) policy distils into a camera-only closed-loop controller. The oracle may read ground-truth object pose; the distilled policy sees only the two camera images plus proprioception. That asymmetry is deliberate: the quantity being measured is visual and spatial robustness the policy must learn that the oracle never needed.
+
+> **Headline:** On `barrier`, the distilled student **beats its privileged teacher** — ACT reaches **90%** in-distribution success vs. the state-machine oracle's **75%**. The policy is not merely copying the demonstrations; inside the trained regime it is *more* robust than the controller that generated them. The same pipeline applied to `push` currently scores **0%** (pre-fix). That contrast is what makes this useful as a diagnostic: holding the pipeline fixed and varying only the task isolates *why* distillation succeeds or fails (see [Camera ablation](#3-camera-ablation--what-the-policy-relies-on) and [Root cause](#root-cause-of-the-push-failure)).
+
+All numbers below are measured. The `push` after-fix retrain is held and **not yet run**; rows that would contain after-fix numbers are marked `_(pending fix run)_` and contain no fabricated values.
+
+---
+
+## Setup (common to all tasks)
+
+| Component | Value |
+|---|---|
+| Simulator | NVIDIA Isaac Lab, Franka Panda arm |
+| Sensors | overhead RGB-D (224×224) + wrist RGB (224×224) + joint pos/vel/torque + dual fingertip contact forces |
+| Action space | 8-D: 7-D IK-absolute end-effector pose (`x, y, z, qw, qx, qy, qz`) + 1 gripper |
+| Control rate | ~50 Hz |
+| Distilled policy | ACT (Action Chunking Transformer); inputs = overhead + wrist RGB + proprioception only (no privileged pose) |
+| `barrier` teacher | hand-written Warp state machine (sufficient where physics is stable) |
+| `push` teacher | PPO RL policy (`rsl_rl`), 4096 parallel envs; the push is too physically unstable for an open-loop state machine |
+
+Two conda environments are used: **`isaaclab`** (sim: demo collection, PPO RL, LeRobot conversion, in-sim eval, and ACT training in the fix pipeline) and **`lerobot`** (lerobot 0.5.2; runs the no-simulator failure analysis — teacher-forcing replay and camera ablation, which need only the dataset + checkpoint, no Isaac Sim).
+
+> **Extension point — large VLA backbones (UNVALIDATED).** `OpenVLAWrapper` is wired into the eval harness (`eval/vla_wrapper.py`, selectable via `scripts/eval/policy.py --policy openvla`) as the integration seam for OpenVLA / Octo / π₀-class models. It has **not** been validated end-to-end on these tasks — no success numbers exist for it and none are claimed here. It is a forward hook, not a result.
+
+---
+
+## 1. Per-task success
+
+Success rate of the distilled ACT policy vs. its privileged oracle, in-distribution (training initialization range), unless noted.
+
+| Task | Distilled ACT | Privileged oracle | Notes |
+|---|:---:|:---:|---|
+| **barrier** (pick-over-barrier) | **90%** | 75% (SM oracle) | **Student beats teacher** in-distribution. Replan = 50 actions/chunk (tuned; see below). |
+| **push** (box→box→corner), pre-fix | **0%** | ~98.5% PPO teacher (training success) | Policy skips box-1 and drives straight at box-2 — diagnosed covariate-shift failure (see root cause). |
+| **push**, after fix | `_(pending fix run)_` | ~98.5% PPO teacher | Fix = widen `push` init DR; retrain held. No after-fix number is available. |
+| **pick** | demonstrations collected | shared pick-and-place SM | Demonstration set collected; full ACT success eval not reported here. |
+
+Notes:
+- **barrier 90% > 75% is the central finding.** The 75% oracle is a geometric state machine reading ground-truth pose; the 90% ACT sees only cameras. Inside the demonstrated range the end-to-end policy is more robust than the modular pose+planner controller it distilled from.
+- The **push 0%** is *not* a missing experiment — it is a reproduced, diagnosed failure (the model demonstrably learned the demos; see [§4](#4-teacher-forcing--the-model-learned-the-demos)). The PPO teacher itself solves the task (~98.5% training success across 4096 parallel envs, per the original report), so the data and the oracle are sound; the failure is in what the *vision* policy latched onto.
+- **barrier replan frequency** is an inference-only lever (no retraining): executing 50 actions per replan (vs. the 100-step default chunk, effectively open-loop over a ~287-step episode) is what raises in-distribution success to the 90% operating point.
+
+---
+
+## 2. Barrier — OOD spatial generalization
+
+Because the policy learns the task from images, it should **interpolate** inside the demonstrated initialization range but is not expected to **extrapolate** beyond it. Tested at two initialization scales — the training range (`×1.0`) and that range expanded by 50% to add an unseen outer ring (`×1.5`) — 20 rollouts each, cube-at-rest-within-8 cm success criterion.
+
+| Initialization scale | Coverage | Policy (ACT) | SM oracle (GT pose) |
+|---|---|:---:|:---:|
+| **×1.0** | inside training range | **90%** | 75% |
+| **×1.5** | training range + unseen outer ring | **55%** | 75% |
+
+Interpretation:
+- **The ranking inverts out of distribution.** The ACT policy drops from 90% → 55% while the oracle, a geometric controller reading ground-truth pose, holds a flat 75% because it extrapolates by construction.
+- The drop is a **generalization gap, not an impossible task**: the oracle solves those same outer-ring starts (stable 75% across both scales), so the failures are spatially localized to the unseen ring, and the success/failure boundary tracks the *training box* rather than task difficulty. This is exactly what image-based interpolation predicts.
+- **Fairness caveat.** The 75% baseline here reads *ground-truth* pose, which is immune to the lighting/clutter/pose error a camera-only policy faces. The intended apples-to-apples baseline is a noisy-pose oracle (the same state machine fed an *estimated* pose), so both pipelines see realistic perception; per the original report that comparison is still pending and is not the source of these numbers.
+- Region/initialization analysis and the top-down success/failure map are produced by `tools/viz/eval_ood.py` (training range encoded as `x ∈ [0.37, 0.63]`, `y ∈ [0.18, 0.32]`; pure numpy + matplotlib, runnable without Isaac Lab). The next lever this points to is **wider, uniformly covered data and a pretrained spatial prior**, not more training steps.
+
+---
+
+## 3. Camera ablation — what the policy relies on
+
+Per-camera input ablation run **without** the simulator: for real demonstration frames, blank one camera and measure how much the policy's predicted action changes vs. full observation, `Δ = mean_t ‖a_full(t) − a_blank-cam(t)‖`. A large Δ means the policy depends heavily on that camera. Source: `tools/viz/push_camera_sensitivity.py` (and `experiments/act_push_failure/act_camera_ablation.py`); numbers from `experiments/act_push_failure/results/{push,barrier}_summary.json`.
+
+| Task | Δ blank overhead | Δ blank wrist | Primary reliance | Eval outcome |
+|---|:---:|:---:|---|:---:|
+| **push** | 0.038 | **0.197** | wrist (moving / fragile) | fail (0%) |
+| **barrier** | **0.089** | 0.027 | overhead (static / robust) | success (90%) |
+
+One-line interpretation: **the successful policy leans on the static overhead camera; the failing one leans on the moving wrist camera.** The `push` policy's heavy wrist reliance (Δ 0.197 vs. 0.038) is a *shortcut* — the wrist view is tightly coupled to the action, so it fits easily in-distribution but goes out-of-distribution the instant the closed-loop trajectory drifts. Direct corroboration: when the wrist image is blanked, the push policy's predicted EE target collapses to `[0.64, 0.25]` (past box-2, toward the corner) — i.e. blanking the wrist reproduces the exact observed failure behavior. `barrier`, by contrast, relies on the viewpoint-stable overhead camera (Δ 0.089 vs. 0.027), which is why the same pipeline generalizes there.
+
+---
+
+## 4. Teacher-forcing — the model learned the demos
+
+To rule out a broken inference path, under-training, or mean-collapse, the model is fed the demonstration's ground-truth observations frame-by-frame and its predicted action is compared to the recorded action.
+
+| Task | Teacher-forcing EE-xy L1 |
+|---|:---:|
+| **push** | **0.011 m** |
+| barrier | 0.006 m |
+
+The push EE-xy L1 of **0.011 m** proves the ACT model *did* learn the demonstrations and the inference pipeline is correct. Combined with §3, this is decisive: the push failure is **not** model capacity, data quality, normalization mismatch, under-training, or mean-collapse — it is a closed-loop covariate-shift failure driven by reliance on the fragile egocentric (wrist) camera.
+
+---
+
+## Root cause of the push failure
+
+The diagnosis (`experiments/act_push_failure/`) chains the evidence above into a single mechanism:
+
+1. **Init DR is too narrow.** The `push` box initialization is randomized by only **±3 cm**, vs. **±13 / ±7 cm** for `barrier`.
+2. **The static overhead camera becomes uninformative.** With ±3 cm of variation, the overhead view is nearly identical across every demo, so it carries little signal about *where to push*.
+3. **The policy takes the wrist-camera shortcut.** Lacking a discriminative overhead signal, the policy latches onto the moving wrist camera (which *does* vary with the action) — confirmed by the ablation (Δ 0.197 wrist vs. 0.038 overhead).
+4. **Closed-loop covariate-shift spiral.** At inference the wrist view goes out-of-distribution as soon as the trajectory drifts; errors amplify and the policy regresses to a vision-free prior — skipping box-1 and driving straight at box-2.
+
+This is the *same* covariate-shift mechanism quantified by the barrier replan-frequency study, but the contact-rich push has no recovery margin, so it surfaces as total failure rather than a degraded success rate.
+
+### The fix (held, not yet run)
+
+Widen the push init DR to **±10–13 cm** (matching barrier's magnitude) so the static overhead camera regains discriminative power and the wrist-view coverage broadens. DR is plumbed through the `PUSH_BOX_DR` environment variable so collection, training, and eval stay consistent (`envs/tasks/push/rl/train_env.py`). The end-to-end pipeline (re-train PPO → re-collect demos → re-convert to LeRobot → re-train ACT) is packaged as a one-shot script:
+
+```bash
+conda activate isaaclab
+./scripts/fix_push_widen_dr.sh      # widen DR, retrain teacher, recollect, retrain ACT
+```
+
+Because demonstrations are generated automatically by the RL teacher, re-collection is cheap. **The after-fix success number is `_(pending fix run)_`.** The expected verification (per `experiments/act_push_failure/`) is that the camera ablation *inverts* — blank-overhead Δ grows and blank-wrist Δ shrinks — indicating the policy has switched to the robust overhead camera. No after-fix numbers are reported until that run completes.
+
+---
+
+## Reproducing the analysis
+
+The failure analysis is dataset-only (no simulator) and runs in the `lerobot` env in minutes:
+
+```bash
+conda activate lerobot
+bash experiments/act_push_failure/run_all.sh
+```
+
+Outputs land in `experiments/act_push_failure/results/`:
+- `push_summary.json` / `barrier_summary.json` — teacher-forcing L1 + per-camera ablation Δ (the numbers in §3 and §4).
+- `push_replay.log` / `barrier_replay.log` — replay logs.
+- `images/` — dumped training frames (overhead + wrist).
+
+OOD spatial analysis (§2) and the camera-sensitivity plot (§3) are regenerated by `tools/viz/eval_ood.py` and `tools/viz/push_camera_sensitivity.py` respectively.
diff --git a/docs/state-machine.md b/docs/state-machine.md
new file mode 100644
index 0000000..aa02b65
--- /dev/null
+++ b/docs/state-machine.md
@@ -0,0 +1,27 @@
+# State-machine oracle (pick & barrier)
+
+Pick and barrier demonstrations are generated by a privileged **Warp GPU state machine**
+(`controllers/pick_place_cube_sm.py`, class `PickAndPlaceSm`). The kernel `infer_state_machine` runs
+all N parallel environments on the GPU, advancing each independently through 10 states. The output is
+an `(N, 8)` action stream — 7-DoF IK-absolute end-effector pose + 1 gripper command — at ~50 Hz.
+
+| State | Name | What it does |
+|---|---|---|
+| 0 | `REST` | hold at the initial pose (~0.2 s) |
+| 1 | `APPROACH_ABOVE_OBJECT` | move to a point directly above the object |
+| 2 | `APPROACH_OBJECT` | descend onto the object |
+| 3 | `GRASP_OBJECT` | close the gripper |
+| 4 | `LIFT_OBJECT` | lift to `lift_height` (raised for the barrier task) |
+| 5 | `MOVE_TO_PLACE` | translate to above the place target |
+| 6 | `PLACE` | descend and release |
+| 7 | `LIFT_AFTER_PLACE` | retract upward to clear the placed object |
+| 8 | `RETREAT` | return toward the rest pose |
+| 9 | `DONE` | terminal; hold until the env resets |
+
+**Barrier via parameters, not new code.** The barrier task reuses the *same* state machine; clearing a
+⅓-arm-height wall is achieved purely by raising `lift_height` / `approach_height`
+(`envs/tasks/barrier/collect_env.py`) so the block travels horizontally above the barrier before
+descending on the far side. Raise the barrier → raise those two heights to keep clearance.
+
+> Push uses no state machine — it is driven by a learned PPO teacher. See
+> [architecture.md](architecture.md).
diff --git a/envs/base/recorders.py b/envs/base/recorders.py
index 0cc1416..ab354e2 100644
--- a/envs/base/recorders.py
+++ b/envs/base/recorders.py
@@ -22,7 +22,6 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
 
 import torch
 
diff --git a/envs/tasks/push/rl/action.py b/envs/tasks/push/rl/action.py
index 1ace5bb..ca476fe 100644
--- a/envs/tasks/push/rl/action.py
+++ b/envs/tasks/push/rl/action.py
@@ -21,7 +21,6 @@
 
 import torch
 from collections.abc import Sequence
-from dataclasses import MISSING
 
 import isaaclab.utils.math as math_utils
 from isaaclab.utils import configclass
diff --git a/envs/tasks/push/rl/train_env.py b/envs/tasks/push/rl/train_env.py
index 568626e..6c44226 100644
--- a/envs/tasks/push/rl/train_env.py
+++ b/envs/tasks/push/rl/train_env.py
@@ -36,7 +36,6 @@
 )
 
 from envs.tasks.push.rl.action import PlanarPushActionCfg
-from envs.tasks.push import PUSH_CORNER
 import envs.tasks.push.rl.mdp as pmdp
 
 
diff --git a/eval/eval_runner.py b/eval/eval_runner.py
index 1eb837c..552402b 100644
--- a/eval/eval_runner.py
+++ b/eval/eval_runner.py
@@ -291,7 +291,7 @@ def _blank_actions(self) -> torch.Tensor:
     @staticmethod
     def _print_header():
         print(f"\n{'='*60}")
-        print(f"  VLA Eval Harness — FrankaPickVLA")
+        print("  VLA Eval Harness — FrankaPickVLA")
         print(f"{'='*60}\n")
         print(f"  {'Ep':>4}  {'env':>3}  {'result':>6}  "
               f"{'steps':>6}  {'success':>10}  {'reason'}")
diff --git a/eval/vla_wrapper.py b/eval/vla_wrapper.py
index fe712e3..41a3ed5 100644
--- a/eval/vla_wrapper.py
+++ b/eval/vla_wrapper.py
@@ -262,7 +262,6 @@ def __init__(
         )
 
     def predict_action(self, obs: dict) -> torch.Tensor:
-        import numpy as np
         import PIL.Image
 
         N      = obs["joint_pos"].shape[0]
@@ -287,7 +286,7 @@ def predict_action(self, obs: dict) -> torch.Tensor:
         return actions   # (N, 8)
 
     @staticmethod
-    def _to_isaac_action(raw: "np.ndarray") -> torch.Tensor:
+    def _to_isaac_action(raw: "np.ndarray") -> torch.Tensor:  # noqa: F821  (quoted forward-ref)
         """
         7-dim VLA output → 8-dim Isaac Lab IK-Abs.
 
@@ -333,13 +332,13 @@ def __init__(
         place_pos: tuple[float, float, float],
         device: str = "cuda:0",
         oracle_pose: str = "gt",                  # "gt" | "noisy"
-        noise_cfg: "PoseNoiseCfg | None" = None,
+        noise_cfg: "PoseNoiseCfg | None" = None,  # noqa: F821  (quoted forward-ref)
         seed: int | None = 0,
     ):
         if preset.make_sm is None:
             raise ValueError(
-                f"task preset 無 make_sm（push 走 RL 收集，無 SM oracle）→ "
-                f"SMOracleWrapper 目前僅支援 pick_place / barrier"
+                "task preset 無 make_sm（push 走 RL 收集，無 SM oracle）→ "
+                "SMOracleWrapper 目前僅支援 pick_place / barrier"
             )
         from eval.perception.pose_noise import PoseNoiseCfg, sample_episode_offset
 
diff --git a/experiments/act_push_failure/README.md b/experiments/act_push_failure/README.md
index 4ae9481..9d00665 100644
--- a/experiments/act_push_failure/README.md
+++ b/experiments/act_push_failure/README.md
@@ -1,69 +1,79 @@
-# ACT Push 失效分析（可復現實驗包）
+# ACT push failure — a reproducible root-cause analysis
 
-> ACT 在 push 任務「**不推 box1、直接斜前奔 box2/桌角**」的根因分析。
-> 每個推論都對應一份可復現的證據。對照組：同 pipeline 但成功的 barrier 任務。
+> Why the push ACT policy **never touches box-1 and drives diagonally toward box-2 / the table corner**.
+> Every claim below maps to a piece of reproducible evidence. Control group: the *same* pipeline on the
+> barrier task, which succeeds.
 
-## TL;DR 根因
-push policy **學成幾乎只依賴「隨手移動的 wrist 第一人稱相機」、幾乎不用靜態 overhead**。
-肇因是 **box1 init 隨機化只有 ±3cm（過窄）+ demo 短少**。
-closed-loop 時 wrist 視角一偏離 demo 就 OOD → covariate shift 螺旋 → 退回「不靠視覺、奔桌角」的 prior。
-barrier 因 DR 寬（±13/±7cm）、依賴穩健的 overhead，故同 pipeline 卻成功。
+## TL;DR — root cause
 
----
+The push policy learns to rely **almost entirely on the moving, first-person wrist camera and barely uses
+the static overhead camera**. The upstream cause is **box-1 initial randomization of only ±3 cm (too narrow)
+plus short demos**. In closed loop, the moment the wrist view drifts from the demos it is out-of-distribution
+→ a covariate-shift spiral → the policy falls back to a vision-independent "run for the corner" prior.
+Barrier, with wide randomization (±13/±7 cm) and reliance on the robust overhead camera, succeeds under the
+identical pipeline.
+
+## How to reproduce
 
-## 如何復現
 ```bash
-conda activate lerobot           # 需 lerobot 0.5.2（與 eval 同 env）
+conda activate lerobot                          # needs lerobot 0.5.2 (same env as eval)
+bash scripts/bootstrap_assets.sh --minimal      # fetch dataset + checkpoint if not already local
 bash experiments/act_push_failure/run_all.sh
 ```
-產出在 `results/`：
+
+Outputs land in `results/`:
 - `push_replay.log` / `push_summary.json`
-- `barrier_replay.log` / `barrier_summary.json`（對照組）
-- `images/push_{wrist,overhead}_*.png`（訓練影像）
+- `barrier_replay.log` / `barrier_summary.json` (control group)
+- `images/push_{wrist,overhead}_*.png` (training images)
 
-> 純資料集 + checkpoint，**不需啟動 Isaac Sim**，數分鐘可跑完。
+> Pure dataset + checkpoint — **no simulator required**; finishes in a few minutes.
 
----
+## Experiment ↔ inference ↔ evidence
 
-## 實驗 ↔ 推論 ↔ 證據 對照表
+| Experiment | What it does | Evidence (push) | Conclusion |
+|---|---|---|---|
+| **E1 Teacher-forcing** | feed ground-truth demo obs frame-by-frame; compare predicted vs recorded action | EE-xy L1 = **0.011 m**; overall 0.0087 | ✅ pipeline correct, the model *did* learn → rules out version/normalizer mismatch, under-training, mean-collapse |
+| **E2 Per-camera ablation** | black out overhead / wrist separately | black-wrist Δ = **0.197**; black-overhead Δ = **0.038** | push relies **heavily on wrist**; rules out causal confusion (it over-uses vision, not ignores it) |
+| **E2 where it goes** | where the policy drives when a camera fails | black-wrist → EE stalls at **[0.64, 0.25]** (past box-2, toward the corner) | exactly reproduces the failure symptom → "wrist failure ⇒ observed behavior" |
+| **Control: barrier** | same script on the successful barrier task | black-overhead Δ = **0.089**; black-wrist Δ = **0.027** | barrier relies on the **robust overhead** → explains why barrier succeeds and push fails |
+| **E3 dump images** | dump training wrist/overhead frames | wrist frames are non-black, genuine first-person view | training images are fine; the wrist view moves with the EE → fragile viewpoint |
 
-| 實驗 | 做什麼 | 證據（push） | 排除/支持的結論 |
-|------|--------|-------------|----------------|
-| **E1 Teacher-forcing** | 逐幀餵 demo 真值 obs → 比 pred vs 記錄 action | EE xy L1 = **0.011 m**、整體 0.0087 | ✅ pipeline 正確、模型有學會 → 排除 **版本/normalizer 不一致、欠訓、mean-collapse** |
-| **E2 逐相機 ablation** | 分別塗黑 overhead / wrist | 黑 wrist Δ=**0.197**；黑 overhead Δ=**0.038** | push **重度依賴 wrist**；排除 **causal confusion（忽略影像）**（反而過度依賴影像）|
-| **E2 黑影像走向** | 看影像失效時 policy 往哪 | 黑 wrist → EE 停在 **[0.64, 0.25]**（越過 box2 奔桌角）| 完美**複製失敗症狀** → 證明「wrist 失效 ⇒ 觀察到的行為」 |
-| **對照組 barrier** | 同腳本跑成功的 barrier | 黑 overhead Δ=**0.090**；黑 wrist Δ=**0.027** | barrier 依賴**穩健 overhead** → 解釋為何 barrier 成功、push 失敗 |
-| **E3 dump 影像** | dump 訓練 wrist/overhead | wrist mean≈115（非黑）、為第一人稱視角 | 訓練影像正常；wrist 隨 EE 移動 → 視角脆弱 |
+### Key numbers side by side (push vs barrier)
 
-### 關鍵數字並排（push vs barrier）
-| | 黑 overhead Δ | 黑 wrist Δ | 主要依賴 | eval 結果 |
+| | black-overhead Δ | black-wrist Δ | primary reliance | eval result |
 |---|:---:|:---:|---|:---:|
-| **push** | 0.038 | **0.197** | wrist（脆弱） | ❌ |
-| **barrier** | **0.090** | 0.027 | overhead（穩健） | ✅ |
-
----
-
-## 為什麼 push 押 wrist、barrier 押 overhead（相機 config 完全相同）
-| 因素 | push | barrier |
-|------|------|---------|
-| box init DR | **±3cm**（`envs/tasks/push/rl/train_env.py::PushEventCfg.reset_box1`）| **±13/±7cm**（`envs/tasks/barrier/collect_env.py::BARRIER_INIT_POSE_RANGE`）|
-| demo 長度 × 數量 | ~40 frame × 50 | ~287 frame |
-| 任務性質 | 低高度接觸推桿（wrist 貼近物體、與動作高度相關）| pick-place（overhead 看全局）|
-
-DR 太窄 → 靜態 overhead 視野在所有 demo 幾乎不變、無區辨力 → policy 改抓「會隨動作變化」的 wrist 當捷徑。
-
----
-
-## 潛在解決方法（按 ROI）
-1. **加大 push 收集 DR**：`PushEventCfg.reset_box1/reset_box2` 的 `pose_range` ±3cm → **±10~13cm**（對齊 barrier），用 RL expert 重收 demo（成本低）再重訓。
-   → 已包成一鍵 pipeline：**`scripts/fix_push_widen_dr.sh`**（DR 由 `PUSH_BOX_DR` 環境變數控制；train/collect/eval 一致）。
-   先 `SPEEDRUN=1` 驗整條通，再跑完整版。跑完用本資料夾的 `act_camera_ablation.py` 驗證相機依賴是否反轉。
-2. **只留 overhead、拿掉 wrist** 重訓一版做快速驗證（若可跑 → 100% 坐實 wrist 依賴診斷）。
-3. 收更多、更長 demo；wrist 影像增強（crop/color jitter）。
-4. DAgger / 收集時加 action noise → 覆蓋偏離軌跡的 wrist 視角，直接對治 covariate shift。
-
-## 檔案
-- `act_camera_ablation.py` — 主診斷（E1 teacher-forcing + E2 逐相機 ablation），輸出 JSON。
-- `dump_obs_images.py` — dump 訓練影像（E3）。
-- `run_all.sh` — 一鍵復現 push + barrier + 影像。
-- `results/` — 證據（log / json / png）。
+| **push** | 0.038 | **0.197** | wrist (fragile) | ❌ |
+| **barrier** | **0.089** | 0.027 | overhead (robust) | ✅ |
+
+## Why push leans on wrist while barrier leans on overhead (identical camera config)
+
+| Factor | push | barrier |
+|---|---|---|
+| box init DR | **±3 cm** (`envs/tasks/push/rl/train_env.py`) | **±13/±7 cm** (`envs/tasks/barrier/collect_env.py`) |
+| demo length × count | ~40 frames × 50 | ~287 frames |
+| task nature | low-contact push (wrist near the object, strongly action-correlated) | pick-place (overhead sees the whole workspace) |
+
+Too-narrow DR → the static overhead view barely changes across demos → no discriminative signal → the policy
+latches onto the wrist camera (which *does* change with the action) as a shortcut.
+
+## Potential fixes (by ROI)
+
+1. **Widen push-collection DR**: raise the box init `pose_range` from ±3 cm to **±10–13 cm** (aligning with
+   barrier), re-collect demos with the RL expert (cheap), and retrain. Packaged as one command —
+   **[`scripts/fix_push_widen_dr.sh`](../../scripts/fix_push_widen_dr.sh)** (DR via the `PUSH_BOX_DR` env var;
+   train / collect / eval stay consistent). Validate by re-running `act_camera_ablation.py` and checking that
+   the camera reliance flips.
+2. **Force the robust view**: retrain overhead-only (drop wrist) as a fast confirmation of the diagnosis.
+3. Collect more / longer demos; augment the wrist image (crop / color jitter).
+4. **DAgger / action-noise during collection** → cover off-trajectory wrist views, directly attacking the
+   covariate shift.
+
+## Files
+
+- `act_camera_ablation.py` — the main diagnostic (E1 teacher-forcing + E2 per-camera ablation); writes JSON.
+- `dump_obs_images.py` — dump training images (E3).
+- `run_all.sh` — one-command reproduction (push + barrier control + images).
+- `results/` — the evidence (logs / JSON / PNGs).
+
+> A narrative write-up of this analysis (with the before→after fix) lives in
+> [`docs/case-study-push.md`](../../docs/case-study-push.md).
diff --git a/experiments/act_push_failure/REPORT_SECTION.md b/experiments/act_push_failure/REPORT_SECTION.md
deleted file mode 100644
index 50caefe..0000000
--- a/experiments/act_push_failure/REPORT_SECTION.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# 報告段落草稿：ACT Push 失效分析
-
-> 可直接貼進報告。建議放在 Results/Challenges 章節。引用的數字皆出自
-> `experiments/act_push_failure/results/`，可復現。
-
----
-
-## Challenge: Camera-Reliance Shortcut in ACT Push Policy
-
-### Problem（問題）
-以 RL expert 蒐集的 demo 訓練 ACT 執行 push 任務（將 box1 推向 box2、再把 box2 推到桌角）時，
-學成的 policy **完全無法執行**：機械臂不去接觸 box1，而是從第一步就直接朝斜前方的 box2／桌角移動。
-相同的感知與訓練 pipeline 在 barrier（越障 pick-and-place）任務上卻能正常運作。
-
-### Root Cause（根因）
-push policy **學成幾乎只依賴隨末端執行器移動的 wrist（第一人稱）相機，而幾乎不使用靜態的 overhead 相機**。
-wrist 視角與動作高度耦合，在訓練分佈內極易擬合（low loss），但在 closed-loop 推論時，
-末端軌跡一旦稍微偏離 demo，wrist 看到的就是訓練中未見過的視角（out-of-distribution），
-導致動作誤差被放大、形成 covariate-shift 螺旋，最終退化成「不依賴視覺、直奔桌角」的先驗行為。
-
-此相機依賴偏差源於資料分佈：push 的 box 初始隨機化僅 **±3 cm**，使靜態 overhead 相機的畫面
-在所有 demo 中幾乎不變、對「往哪推」缺乏區辨力，policy 因而轉向「會隨動作變化」的 wrist 相機作為捷徑。
-相對地，barrier 的初始隨機化達 **±13 / ±7 cm**，迫使 policy 使用視角穩健的 overhead 相機，故得以泛化成功。
-
-### Analysis & Evidence（分析與證據）
-分析以兩個純資料集實驗完成（不需模擬器），並以成功的 barrier 任務作為對照：
-
-1. **Teacher-forcing replay**：逐幀餵入 demo 的真值觀測，模型重現記錄動作的末端位置誤差僅
-   **0.011 m**（整體 L1 0.0087）。證明推論 pipeline 正確、模型確實學會了動作，
-   排除了版本／正規化不一致、欠訓、mean-collapse 等假設。
-2. **逐相機影像 ablation**：對同一幀分別將相機影像塗黑，量測預測位移——
-
-   | 任務 | 塗黑 overhead | 塗黑 wrist | 主要依賴 | eval 結果 |
-   |------|:---:|:---:|---|:---:|
-   | push | 0.038 | **0.197** | wrist（脆弱） | 失敗 |
-   | barrier | **0.090** | 0.027 | overhead（穩健） | 成功 |
-
-   塗黑 wrist 時，push policy 的末端目標退化到 **[0.64, 0.25]**（越過 box2 奔向桌角），
-   與實際失效行為完全一致，直接證明「wrist 失效 ⇒ 觀察到的症狀」。
-
-綜合：失效並非來自模型容量、資料品質或推論實作，而是 **policy 對脆弱的 egocentric 相機形成依賴捷徑**，
-其上游成因為**過窄的初始狀態隨機化**。
-
-### Potential Solutions（潛在解決方法）
-1. **加大初始隨機化**：將 push 的 box 初始 DR 由 ±3 cm 提高至 ±10–13 cm（對齊 barrier），
-   使 overhead 相機重獲區辨力、並擴大 wrist 視角覆蓋；由於 demo 由 RL expert 自動產生，重蒐集成本極低。
-2. **強制使用穩健視角**：移除 wrist、僅保留 overhead 重訓作為快速驗證；或對 wrist 影像做資料增強。
-3. **覆蓋離分佈狀態**：以 DAgger 或在蒐集時注入動作雜訊，讓 demo 涵蓋偏離軌跡的視角，直接緩解 covariate shift。
-
-> 復現：`bash experiments/act_push_failure/run_all.sh`（詳見該資料夾 README）。
diff --git a/experiments/act_push_failure/results/barrier_summary.json b/experiments/act_push_failure/results/barrier_summary.json
index b8aa749..ff4c6a4 100644
--- a/experiments/act_push_failure/results/barrier_summary.json
+++ b/experiments/act_push_failure/results/barrier_summary.json
@@ -3,7 +3,7 @@
   "repo_id": "franka_barrier_v1",
   "episode": 0,
   "n_frames": 287,
-  "model_path": "/home/kevin786/Workspace/Project/Isaac_sim/_out/act/act_franka_barrier_v1_run_20260613_170049/checkpoints/last/pretrained_model",
+  "model_path": "_out/act/act_franka_barrier_v1/checkpoints/last/pretrained_model",
   "E1_teacher_forcing": {
     "mean_l1_all": 0.006865891627967358,
     "mean_l1_ee_xy": 0.006371738389134407,
diff --git a/experiments/act_push_failure/results/push_summary.json b/experiments/act_push_failure/results/push_summary.json
index c39df00..a89ed92 100644
--- a/experiments/act_push_failure/results/push_summary.json
+++ b/experiments/act_push_failure/results/push_summary.json
@@ -3,7 +3,7 @@
   "repo_id": "franka_push_rl_v1",
   "episode": 0,
   "n_frames": 40,
-  "model_path": "/home/kevin786/Workspace/Project/Isaac_sim/_out/act/act_franka_push_rl_v1_run_20260613_170903/checkpoints/last/pretrained_model",
+  "model_path": "_out/act/act_franka_push_rl_v1/checkpoints/last/pretrained_model",
   "E1_teacher_forcing": {
     "mean_l1_all": 0.008742369711399078,
     "mean_l1_ee_xy": 0.01095487643033266,
diff --git a/experiments/act_push_failure/run_all.sh b/experiments/act_push_failure/run_all.sh
index b0fad89..42daa91 100644
--- a/experiments/act_push_failure/run_all.sh
+++ b/experiments/act_push_failure/run_all.sh
@@ -16,10 +16,17 @@ PROJ="$(cd "$HERE/../.." && pwd)"          # Isaac_sim 專案根
 RES="$HERE/results"
 mkdir -p "$RES/images"
 
+# Resolve an ACT checkpoint by tag: prefer the canonical bootstrap path
+# (_out/act/act_<tag>/...), else the newest timestamped training run.
+resolve_ck() {  # $1 = tag
+  local canon="$PROJ/_out/act/act_$1/checkpoints/last/pretrained_model"
+  if [ -d "$canon" ]; then echo "$canon"; return; fi
+  ls -d "$PROJ"/_out/act/act_"$1"_run_*/checkpoints/last/pretrained_model 2>/dev/null | sort | tail -1
+}
 PUSH_DS="$PROJ/_out/datasets/lerobot/franka_push_rl_v1"
-PUSH_CK="$PROJ/_out/act/act_franka_push_rl_v1_run_20260613_170903/checkpoints/last/pretrained_model"
+PUSH_CK="$(resolve_ck franka_push_rl_v1)"
 BAR_DS="$PROJ/_out/datasets/lerobot/franka_barrier_v1"
-BAR_CK="$PROJ/_out/act/act_franka_barrier_v1_run_20260613_170049/checkpoints/last/pretrained_model"
+BAR_CK="$(resolve_ck franka_barrier_v1)"
 
 echo "######## (E1+E2) PUSH ########"
 python "$HERE/act_camera_ablation.py" --tag push \
diff --git a/pyproject.toml b/pyproject.toml
index b03d563..fe440e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,25 +3,38 @@ requires = ["setuptools>=61"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "isaac-sim-corvinus"
+name = "sim2act"
 version = "0.1.0"
-description = "Franka pick / push / barrier VLA demo-collection + eval pipeline on Isaac Lab"
+description = "sim2act — a VLA simulation data engine: Franka pick / push / barrier demo collection, LeRobot v3.0 conversion, ACT training + closed-loop eval on Isaac Lab"
 requires-python = ">=3.10"
-# 註：isaaclab / isaacsim / warp / torch 由 Isaac Lab 環境提供，不在此宣告。
-# 以下為純資料管線（在 isaaclab python 內）會用到的輕量相依。
+# Note: isaaclab / isaacsim / warp / torch are provided by the Isaac Lab environment and are
+# intentionally NOT declared here. Below are the lightweight deps the pure data pipeline needs
+# (importable from inside the isaaclab python).
 dependencies = [
     "h5py",
     "numpy",
 ]
 
 [project.optional-dependencies]
-# LeRobot v3.0 轉換（通常在獨立的 lerobot conda env 執行）
+# LeRobot v3.0 conversion + ACT training (usually run in a separate `lerobot` conda env).
 lerobot = ["pandas", "pyarrow", "imageio[ffmpeg]"]
-# 點雲視覺化工具
+# Point-cloud visualization tooling.
 viz = ["open3d"]
+# Asset bootstrap (download datasets/checkpoints from the Hugging Face Hub).
+hub = ["huggingface_hub"]
 
 [tool.setuptools.packages.find]
-# 自動探索（含子套件 envs.base / envs.tasks.push.rl / envs.scenes ...）。
-# 安裝後 `from envs...` / `from eval...` 等不再需要 sys.path hack；
-# 之後新增 task 子資料夾會被自動收錄，不必再手動維護清單。
-include = ["envs*", "controllers*", "eval*", "data*", "experimental*"]
+# Auto-discover (incl. sub-packages envs.base / envs.tasks.push.rl / envs.scenes ...).
+# After `pip install -e .`, `from envs...` / `from eval...` no longer need a sys.path hack;
+# new task sub-folders are picked up automatically — no manual list to maintain.
+include = ["envs*", "controllers*", "eval*", "data*"]
+
+[tool.ruff]
+line-length = 120
+target-version = "py310"
+
+[tool.ruff.lint]
+# Light but real: catch genuine bugs (pyflakes F) and syntax errors (E9) without
+# reformatting research code or churning on whitespace/line-length.
+select = ["E9", "F"]
+ignore = []
diff --git a/scripts/bootstrap_assets.sh b/scripts/bootstrap_assets.sh
new file mode 100644
index 0000000..f885361
--- /dev/null
+++ b/scripts/bootstrap_assets.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# scripts/bootstrap_assets.sh
+# ──────────────────────────────────────────────────────────────────────────────
+# Download the datasets + checkpoints needed to run sim2act without regenerating
+# them in-sim. Everything lands under _out/ (gitignored).
+#
+# Requires the `hub` extra (provides the `hf` CLI / huggingface_hub):
+#   pip install -e ".[hub]"
+#
+# Usage:
+#   bash scripts/bootstrap_assets.sh --minimal   # just enough for the no-sim failure analysis
+#   bash scripts/bootstrap_assets.sh --all        # all published datasets + checkpoints
+#
+# Env:
+#   HF_NAMESPACE   Hugging Face user/org hosting the assets (default: CLM0215)
+# ──────────────────────────────────────────────────────────────────────────────
+set -euo pipefail
+REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$REPO"
+
+HF_NAMESPACE="${HF_NAMESPACE:-CLM0215}"
+MODE="${1:---minimal}"
+
+command -v hf >/dev/null 2>&1 || { echo "❌ 'hf' not found. Install: pip install -e \".[hub]\""; exit 1; }
+
+dl() {  # $1 = repo name   $2 = dataset|model   $3 = local dest dir
+  echo "↓ $2  ${HF_NAMESPACE}/$1  →  $3"
+  hf download "${HF_NAMESPACE}/$1" --repo-type "$2" --local-dir "$3" >/dev/null
+}
+
+echo "Bootstrapping assets (namespace: ${HF_NAMESPACE}, mode: ${MODE})"
+
+# ── minimal: the simulator-free failure analysis (experiments/act_push_failure/run_all.sh) ──
+dl franka-push-rl-v1 dataset _out/datasets/lerobot/franka_push_rl_v1
+dl act-push-rl-v1    model   _out/act/act_franka_push_rl_v1/checkpoints/last/pretrained_model
+dl franka-barrier-v1 dataset _out/datasets/lerobot/franka_barrier_v1
+dl act-barrier-v1    model   _out/act/act_franka_barrier_v1/checkpoints/last/pretrained_model
+
+if [ "$MODE" = "--all" ]; then
+  dl franka-pick-place dataset _out/datasets/lerobot/frank_pick_place
+  dl ppo-push-teacher  model   _out/rl/franka_push/exported
+  # (push v2 / after-fix assets are added here once the fix run is published.)
+fi
+
+echo "✅ Bootstrap complete."
+echo "   Next: conda activate lerobot && bash experiments/act_push_failure/run_all.sh"
diff --git a/scripts/collect/demos.py b/scripts/collect/demos.py
index 5ddc176..3f31f61 100644
--- a/scripts/collect/demos.py
+++ b/scripts/collect/demos.py
@@ -175,7 +175,7 @@ def main():
     # ── 完成 ──────────────────────────────────────────────────────────────────
     total_recorded = env.unwrapped.recorder_manager.exported_successful_episode_count
     print(f"\n{'='*55}")
-    print(f"[collect_demos] 完成！")
+    print("[collect_demos] 完成！")
     print(f"  成功 demo: {total_recorded}")
     print(f"  總 episodes: {n_episodes}")
     print(f"  成功率: {total_recorded / max(n_episodes, 1):.1%}")
diff --git a/scripts/eval/oracle_noise_sweep.py b/scripts/eval/oracle_noise_sweep.py
index da34101..ab8ca24 100644
--- a/scripts/eval/oracle_noise_sweep.py
+++ b/scripts/eval/oracle_noise_sweep.py
@@ -8,7 +8,7 @@
 #       每個 σ 重建一個 SMOracleWrapper（新 SM + 新 per-episode 偏移），跑 EvalRunner。
 #
 # 用法：
-#   conda activate isaaclab && cd /home/kevin786/Workspace/Project/Isaac_sim
+#   conda activate isaaclab && cd /path/to/sim2act
 #   python scripts/eval/oracle_noise_sweep.py --task barrier \
 #       --sweep_axis xy --num_rollouts 40 --num_envs 4 --headless --enable_cameras
 #
@@ -150,7 +150,7 @@ def main():
 
     print(f"\n{'='*60}")
     print(f"  Sweep done → {stem}.csv / .json")
-    print(f"  σ(mm) → success:")
+    print("  σ(mm) → success:")
     for r in rows:
         print(f"    {r['sigma_m']*1000:5.1f}  {r['success_rate']:.1%}")
     print(f"{'='*60}\n")
diff --git a/scripts/eval/policy.py b/scripts/eval/policy.py
index 8d94fba..ca60341 100644
--- a/scripts/eval/policy.py
+++ b/scripts/eval/policy.py
@@ -136,7 +136,7 @@
 from envs.tasks.task_presets import get_preset   # per-task eval 場景 + 成功規格（單一事實來源）
 
 
-def _make_policy(args, preset=None, dt=None, num_envs=None, place_pos=None) -> "VLAWrapper":
+def _make_policy(args, preset=None, dt=None, num_envs=None, place_pos=None) -> "VLAWrapper":  # noqa: F821
     if args.policy == "dummy":
         return DummyVLAWrapper(device=args.device)
 
diff --git a/scripts/eval/record_demos.sh b/scripts/eval/record_demos.sh
index 148be8a..3939518 100755
--- a/scripts/eval/record_demos.sh
+++ b/scripts/eval/record_demos.sh
@@ -23,7 +23,7 @@
 # ──────────────────────────────────────────────────────────────────────────────
 set -euo pipefail
 
-REPO="/home/kevin786/Workspace/Project/Isaac_sim"
+REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"   # repo root (this script lives in scripts/eval/)
 cd "$REPO"
 
 CKPT="${1:-${CKPT:-}}"
diff --git a/scripts/fix_push_widen_dr.sh b/scripts/fix_push_widen_dr.sh
index 307f14d..b885af1 100755
--- a/scripts/fix_push_widen_dr.sh
+++ b/scripts/fix_push_widen_dr.sh
@@ -34,7 +34,7 @@
 # ──────────────────────────────────────────────────────────────────────────────
 set -euo pipefail
 
-REPO="/home/kevin786/Workspace/Project/Isaac_sim"
+REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"   # repo root (this script lives in scripts/)
 cd "$REPO"
 
 TAG="${TAG:-franka_push_v2_dr12}"
diff --git a/scripts/recollect_retrain_barrier.sh b/scripts/recollect_retrain_barrier.sh
index f9f8078..c6f670e 100755
--- a/scripts/recollect_retrain_barrier.sh
+++ b/scripts/recollect_retrain_barrier.sh
@@ -25,7 +25,7 @@
 # ──────────────────────────────────────────────────────────────────────────────
 set -euo pipefail
 
-REPO="/home/kevin786/Workspace/Project/Isaac_sim"
+REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"   # repo root (this script lives in scripts/)
 cd "$REPO"
 
 TAG="${TAG:-franka_barrier_v2}"
diff --git a/scripts/rl/play_push.py b/scripts/rl/play_push.py
index b06b61b..d2a4d4a 100644
--- a/scripts/rl/play_push.py
+++ b/scripts/rl/play_push.py
@@ -36,7 +36,6 @@
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper, handle_deprecated_rsl_rl_cfg
 
 from envs.tasks.push.rl.ppo_cfg import PushPPORunnerCfg
-import envs.tasks.push.rl.mdp as pmdp
 
 
 def _latest_checkpoint() -> str:
diff --git a/tools/checks/hdf5_data.py b/tools/checks/hdf5_data.py
index 7de74cb..0abe2f9 100644
--- a/tools/checks/hdf5_data.py
+++ b/tools/checks/hdf5_data.py
@@ -23,7 +23,7 @@
         # ── structured obs ────────────────────────────────────────────
         if "obs" in demo:
             obs = demo["obs"]
-            print(f"    obs/")
+            print("    obs/")
             for obs_key in sorted(obs.keys()):
                 item = obs[obs_key]
                 print(f"      {obs_key:<16}: {item.shape}  {item.dtype}")
@@ -56,7 +56,7 @@
                 status = "✅" if np.abs(tq).max() > 1e-3 else "⚠️ 全零"
                 print(f"    joint_torque      : |τ|max={np.abs(tq).max():.3f}  {status}")
         else:
-            print(f"    obs: ❌ 缺少")
+            print("    obs: ❌ 缺少")
 
         # ── states ────────────────────────────────────────────────────
         if "states" in demo:
diff --git a/tools/viz/push_camera_sensitivity.py b/tools/viz/push_camera_sensitivity.py
index e11eacc..675ba39 100644
--- a/tools/viz/push_camera_sensitivity.py
+++ b/tools/viz/push_camera_sensitivity.py
@@ -6,7 +6,7 @@
 # Usage:
 #   python tools/viz/push_camera_sensitivity.py --model_path <ckpt>/pretrained_model \
 #       --hdf5 _out/datasets/franka_push_rl_demos_clean/dataset.hdf5 --out <png>
-import argparse, glob
+import argparse
 import numpy as np
 import torch