diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..2b22236 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + workflow_dispatch: + +jobs: + lint: + name: ruff (lint) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: pip install ruff + - run: ruff check . + + reproduce-analysis: + name: no-sim failure analysis (reproducibility smoke) + runs-on: ubuntu-latest + # ACT inference on a CPU runner is heavy; run on demand rather than on every push. + if: github.event_name == 'workflow_dispatch' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install (lerobot + hub extras) + run: pip install -e ".[lerobot,hub]" "lerobot==0.5.2" + - name: Fetch minimal assets from the Hugging Face Hub + run: bash scripts/bootstrap_assets.sh --minimal + - name: Reproduce the analysis (no simulator) + run: bash experiments/act_push_failure/run_all.sh + - name: Assert the wrist-shortcut diagnostic holds + run: | + python - <<'PY' + import json + s = json.load(open("experiments/act_push_failure/results/push_summary.json")) + a = s["E2_camera_ablation"] + wrist, overhead = a["delta_black_wrist_only"], a["delta_black_overhead_only"] + print(f"black-wrist Δ={wrist:.3f} black-overhead Δ={overhead:.3f}") + assert wrist > overhead, "expected push to rely on the wrist camera (wrist Δ > overhead Δ)" + print("OK: push policy is wrist-reliant, as diagnosed.") + PY diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..cdd4b6d --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,28 @@ +cff-version: 1.2.0 +message: "If you use sim2act in your work, please cite it." +title: "sim2act: a VLA simulation data engine" +abstract: >- + An end-to-end NVIDIA Isaac Lab pipeline for collecting multimodal Franka manipulation + demonstrations (a Warp state-machine oracle and a PPO RL teacher), converting them to the + LeRobot v3.0 format, training Action Chunking Transformer (ACT) policies, and evaluating them + closed-loop — together with a reproducible, simulator-free failure-analysis case study of a + camera-reliance shortcut in an imitation policy. +type: software +authors: + - family-names: Ma + given-names: Kevin +license: Apache-2.0 +repository-code: "https://github.com/Kevinma0215/sim2act" +url: "https://github.com/Kevinma0215/sim2act" +version: "0.1.0" +date-released: "2026-06-19" +keywords: + - vision-language-action + - imitation-learning + - robot-learning + - action-chunking-transformer + - isaac-lab + - lerobot + - sim-to-real + - domain-randomization + - covariate-shift diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..47df74f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,77 @@ +# CLAUDE.md — sim2act + +> Working notes for AI agents on this repo. Read before editing or running anything. +> This file is the single source of truth for commands, conventions, and verified numbers — +> prefer it over re-deriving facts from the code. + +## What this is + +**sim2act** — a VLA (vision-language-action) **simulation data engine** on NVIDIA Isaac Lab. A Franka +arm performs pick / barrier / push; the repo takes each task from privileged oracle → multimodal demo +collection → LeRobot v3.0 → ACT imitation learning → a closed-loop eval harness. Positioning is +three-in-one: **data engine** (umbrella) + **rigorous failure diagnosis** + **end-to-end closed loop**. +Audience: foundation-model robotics engineers. + +Origin: built as an **R&S (Robotics & Simulation) take-home challenge**, now generalized into a public +project. **R&S = Robotics & Simulation — NOT "Rohde & Schwarz".** The old submission codename was +"Corvinus" and is being retired (only `docs/archive/` may still mention it). + +## Conda environments (CRITICAL — there are two; do not mix them) + +Isaac Lab is pip-installed into the `isaaclab` conda env, so run stages with **plain `python`** +(NOT `./isaaclab.sh -p`, despite older README text). Prefer `conda run -n python ...`. + +| env | key versions | use for | +|---|---|---| +| **isaaclab** (py3.11) | isaacsim 5.1.0, isaaclab 0.54.4, lerobot **0.4.4**, torch 2.7, warp 1.14 | everything touching the simulator: collect, PPO RL train, LeRobot convert, eval — and the push-fix pipeline's ACT training (`fix_push_widen_dr.sh` runs end-to-end in this env) | +| **lerobot** (py3.12) | lerobot **0.5.2**, torch 2.11, no Isaac Sim | the **no-sim** failure analysis `experiments/act_push_failure/run_all.sh` (requires 0.5.2) | + +Gotcha: the two envs ship different lerobot versions (0.4.4 vs 0.5.2); existing ACT checkpoints load +under both. Keep each pipeline inside ONE env: run the whole `fix_push_widen_dr.sh` in `isaaclab`; +run the no-sim ablation in `lerobot`. + +Hardware here: 1× RTX 5060 Ti (16 GB). PPO default is 4096 envs — may need fewer on 16 GB; SPEEDRUN uses 256. + +## Canonical commands (run from repo root) + +- Editable install: `conda run -n isaaclab python -m pip install -e .` +- Collect (SM oracle): `python scripts/collect/demos.py --task pick_place|barrier --num_demos 50 --headless --enable_cameras` +- Push RL chain: `python scripts/train/push_rl.py --headless --num_envs 4096` → `python scripts/rl/export_push.py --headless` → `python scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras` +- Convert → LeRobot: `python data/convert_to_lerobot.py --input _out/datasets/_official_demos/dataset.hdf5 --output _out/datasets/lerobot/ --state_keys joint_pos,joint_vel --no_depth` +- Train ACT: `python scripts/train/act.py --dataset _out/datasets/lerobot/ --steps 40000 --batch-size 8 [--wandb]` +- Eval: `python scripts/eval/policy.py --policy act|oracle|dummy --task --model_path /pretrained_model --num_rollouts 20 --headless --enable_cameras` (extras: `--ablate_camera overhead|wrist`, `--n_action_steps`, `--init_scale`, `--oracle-pose gt|noisy`) +- No-sim push failure analysis: `conda run -n lerobot bash experiments/act_push_failure/run_all.sh` +- Push fix (whole chain): `conda run -n isaaclab bash scripts/fix_push_widen_dr.sh` (run `SPEEDRUN=1` first). DR is set via `PUSH_BOX_DR` and shared by train/collect/eval. + +## Outputs: everything generated lives under `_out/` (gitignored) + +`_out/datasets/{_official_demos/dataset.hdf5 (raw HDF5), lerobot/ (LeRobot v3.0)}`, +`_out/rl/franka_push//`, `_out/act/act__run_/checkpoints/{,last}/pretrained_model`, +`_out/eval/*.json`, `_out/viz/`. +The one generated thing that IS committed: `experiments/act_push_failure/results/` (analysis evidence — +deliberately gitignore-excepted). + +## Verified numbers (cite verbatim; do not re-derive) + +From `experiments/act_push_failure/results/*_summary.json`: +- push teacher-forcing EE-xy L1 = **0.011 m** (proves the model learned the demos). +- push camera ablation: black-**wrist** Δ = **0.197** vs black-**overhead** Δ = **0.038** → wrist shortcut. +- barrier ablation: black-overhead Δ = **0.089** vs black-wrist Δ = **0.027** → robust overhead. +- barrier ACT **90%** in-dist vs SM oracle **75%**; OOD at init_scale 1.5 → **55%**. push ACT **0%** (pre-fix). +- Root cause: push init DR ±3 cm (vs barrier ±13/±7 cm) → static overhead uninformative → policy takes the + wrist shortcut → closed-loop covariate-shift spiral. + +## Don't + +- Don't commit `_out/` or large media (`.gif/.webm/.mp4`) into git history (host on HF Hub / GitHub releases). +- Don't rename the Python modules (`envs`/`eval`/`data`/`controllers`) — only the distribution name is `sim2act`. +- Don't call it "Rohde & Schwarz". R&S = Robotics & Simulation. +- Don't advertise OpenVLA / Octo / π0 as done — `OpenVLAWrapper` is wired but unvalidated (an extension point). +- Don't hardcode `/home/kevin786/...` — use the `BASH_SOURCE` repo-root pattern (see `scripts/*.sh`). +- Don't launch the heavy push fix without a `SPEEDRUN=1` smoke first. + +## Layout + +`envs/` (base/tasks/scenes cfg) · `controllers/` (Warp GPU state machine) · `scripts/{collect,train,eval,rl,viz}` +· `eval/` (VLA eval harness) · `data/convert_to_lerobot.py` · `tools/{checks,smoke,viz}` · +`experiments/act_push_failure/` (flagship failure analysis, no-sim) · `docs/` · `_out/` (generated, gitignored). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ef1f090 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,41 @@ +# Contributing to sim2act + +Thanks for your interest. sim2act is a research codebase; contributions that improve +reproducibility, add tasks/policies, or sharpen the analysis are very welcome. + +## Environments + +Two conda envs are used (full matrix in [CLAUDE.md](CLAUDE.md)): + +- **`isaaclab`** — anything that touches the simulator: demo collection, PPO RL training, LeRobot + conversion, and closed-loop eval. Isaac Lab is pip-installed into this env, so run scripts with + plain `python` (not `./isaaclab.sh -p`). +- **`lerobot`** — the simulator-free failure analysis (`lerobot==0.5.2`). + +Install the package editable: + +```bash +conda run -n isaaclab python -m pip install -e ".[hub]" +``` + +## Sanity check without a GPU or simulator + +The flagship failure analysis reproduces in ~5 minutes from a published dataset + checkpoint, no +Isaac Sim required: + +```bash +conda activate lerobot +bash scripts/bootstrap_assets.sh --minimal # pulls the dataset + checkpoint from the HF Hub +bash experiments/act_push_failure/run_all.sh +``` + +## Style + +- Python is linted with [ruff](https://docs.astral.sh/ruff/): `ruff check . && ruff format --check .` +- Keep generated artifacts out of git — everything lands under `_out/` (gitignored). +- Don't hardcode absolute paths; shell scripts resolve the repo root via `BASH_SOURCE`. + +## Pull requests + +Keep PRs focused and clearly described. If a change affects the pipeline, say which stage(s) and +which conda env you validated it in. CI runs ruff plus the no-simulator reproducibility smoke. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..53da2d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2026 Kevin Ma + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 7510b12..4b43ea9 100644 --- a/README.md +++ b/README.md @@ -1,317 +1,199 @@ -# Isaac_sim — Franka VLA Pipeline(pick / barrier / push) +# sim2act — a VLA simulation data engine -基於 **IsaacLab** 建立的 Franka 操作示範收集與 VLA 訓練資料管線。 +[![CI](https://github.com/Kevinma0215/sim2act/actions/workflows/ci.yml/badge.svg)](https://github.com/Kevinma0215/sim2act/actions/workflows/ci.yml) +[![License: Apache-2.0](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE) +[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](pyproject.toml) +[![Isaac Sim 5.1](https://img.shields.io/badge/Isaac%20Sim-5.1-76B900.svg)](https://developer.nvidia.com/isaac/sim) +[![LeRobot v3.0](https://img.shields.io/badge/LeRobot-v3.0-orange.svg)](https://github.com/huggingface/lerobot) -> **目標**:Env 設定 → 示範收集(SM oracle / RL oracle)→ LeRobot v3.0 格式 → ACT 訓練 → VLA Eval Harness +**From simulation to action.** sim2act manufactures Vision-Language-Action (VLA) training data in +NVIDIA Isaac Lab — and stress-tests the policies it produces. ---- +🇹🇼 中文版:[README.zh-Hant.md](README.zh-Hant.md) -## Challenge 對應(R&S Round 2) +

+ ACT policy picking a block over a barrier
+ A learned ACT policy clearing the barrier task — trained entirely from simulated oracle demos. +

-本專案為 R&S Round-2 挑戰(`R&S Challenge.md`)的實作;書面報告見 -[`Project-Corvinus-Report.pdf`](Project-Corvinus-Report.pdf)。 +## TL;DR -| Goal | 內容 | 本專案實作 | 狀態 | -|------|------|-----------|:---:| -| 1 | 桌上放一個 box | `scripts/viz/two_box_table.py`(`envs/scenes/two_box_table_env.py`)| ✅ | -| 2 | 第二個 box | 同上(場景含兩顆 box)| ✅ | -| 3 | 用 box1 把 box2 推向桌角 | push RL oracle(`envs/tasks/push/`,`scripts/train/push_rl.py`)| ✅ oracle;ACT 已診斷待修復 | -| 4 | 1/3 臂高屏障,pick 越障到另一側 | barrier SM(`envs/tasks/barrier/`,調高 lift/approach)| ✅ | -| 5 | 收集可實際使用的感測資料 | RGB-D + wrist RGB + joint-state + 指尖 contact(`envs/base/`)| ✅(IMU 經分析判為冗餘移除)| +sim2act takes three contact-rich Franka manipulation tasks — pick, pick-over-barrier, and +non-prehensile push — through a complete data flywheel: ---- +> **scene → privileged oracle (Warp GPU state machine + a 4096-env PPO teacher) → multimodal demo +> collection → LeRobot v3.0 → ACT imitation learning → a closed-loop eval harness** -## Pipeline 概覽 +one command per stage, fully reproducible. Two results carry the project: -``` -[ pick_place / barrier ] [ push ] -FrankaBaseEnvCfg + pick_place_cube_sm.py PPO push teacher(RL oracle) -(overhead + wrist camera, Warp SM) scripts/train/push_rl.py → rl/export_push.py - │ │ - ▼ ▼ - scripts/collect/demos.py scripts/collect/push_rl_demos.py - (--task pick_place|barrier) (RL policy rollout) - │ │ - └────────────────────┬─────────────────────────┘ - ▼ - _out/datasets/franka__demos/dataset.hdf5 - (actions + obs + rgb_overhead/wrist + depth + contact) - │ - ▼ - data/convert_to_lerobot.py → LeRobot v3.0(parquet + mp4) - │ - ┌────────────┴─────────────┐ - ▼ ▼ - scripts/train/act.py eval/ + scripts/eval/policy.py - (ACT,IL) ──────▶ (VLA rollout → _out/eval/eval_*.json) -``` - ---- - -## 目錄結構 +1. a student ACT policy that **beats its privileged oracle on the barrier task (90% vs 75%)**, with a + measured out-of-distribution generalization curve; and +2. a **rigorous, simulator-free root-cause diagnosis of a 0%-success push policy** — traced to a + camera-shortcut induced by under-randomized initial states — plus the **before→after fix** that + targets it. -``` -Isaac_sim/ -├── pyproject.toml # editable 套件定義(移除 sys.path hack 的關鍵) -│ -├── envs/ # 環境 config(base / tasks / scenes 分層) -│ ├── base/ # 共用基底(繼承鏈,每層只負責一件事) -│ │ ├── franka_base_env.py # FrankaSensorEnvCfg (+ overhead RGB-D / wrist RGB) -│ │ │ # └ FrankaPickVLAEvalEnvCfg (評估分支,無 recorder) -│ │ │ # FrankaBaseEnvCfg (+ contact 感測 + RecorderManager) -│ │ │ # └ FrankaPickVLAEnvCfg (pick-and-place) -│ │ └── recorders.py # PreStepStructuredObsRecorder → 結構化 obs/ -│ │ # joint_pos/vel/torque, object_pos/quat, -│ │ # rgb_overhead/wrist, depth_overhead, contact_l/rfinger -│ ├── tasks/ # 各任務(繼承 base,只加自己的場景/事件) -│ │ ├── push/ # push 走 RL oracle(SM push 已移除) -│ │ │ ├── __init__.py # PUSH_CORNER 桌角目標常數(RL reward/eval 共用) -│ │ │ ├── eval_env.py # FrankaPushVLAEvalEnvCfg(ACT eval) -│ │ │ └── rl/ # push RL oracle 子系統(PPO teacher) -│ │ │ ├── action.py # PlanarPushActionCfg (x,y,yaw) -│ │ │ ├── mdp.py # push 專用 obs/reward 函式 -│ │ │ ├── train_env.py # FrankaPushRLEnvCfg(4096 env,無相機;box DR 由 PUSH_BOX_DR 控制) -│ │ │ ├── collect_env.py # FrankaPushRLCollectEnvCfg(用 policy 收 demo) -│ │ │ └── ppo_cfg.py # PushPPORunnerCfg(rsl_rl 超參) -│ │ └── barrier/ -│ │ ├── collect_env.py # FrankaBarrierEnvCfg + apply_barrier_scene(Goal 4) -│ │ └── eval_env.py # FrankaBarrierVLAEvalEnvCfg(ACT eval) -│ └── scenes/ -│ └── two_box_table_env.py # TwoBoxTableSceneCfg:純場景(桌 + 兩方塊,無手臂,Goal 1/2) -│ -├── controllers/ # Warp GPU state machine(機器人無關 kernel) -│ └── pick_place_cube_sm.py # PickAndPlaceSm:REST→…→GRASP→LIFT→…→RETREAT→DONE -│ # (pick + barrier 共用;push 走 RL,無 SM) -│ -├── scripts/ # pipeline entrypoints(依功能分子資料夾) -│ ├── collect/ # 收集 demo -│ │ ├── demos.py # SM oracle(--task pick_place|barrier)→ HDF5 -│ │ └── push_rl_demos.py # RL oracle(push)→ HDF5 -│ ├── train/ # 訓練 -│ │ ├── act.py # ACT(IL,wrap lerobot-train) -│ │ └── push_rl.py # PPO(push RL teacher,rsl_rl) -│ ├── eval/ # VLA 評估 -│ │ ├── policy.py # 評估入口(--policy dummy|act|openvla|oracle) -│ │ ├── oracle_noise_sweep.py # oracle 位姿噪聲掃描(去 privilege 敏感度) -│ │ └── record_demos.sh # 一鍵錄製 in-dist / OOD / oracle 對照影片 -│ ├── rl/ # push RL 輔助 -│ │ ├── export_push.py # checkpoint → policy.pt(jit) -│ │ └── play_push.py # 播放/視覺化訓好的 policy(量成功率) -│ ├── viz/ # 場景視覺化 debug(不錄製) -│ │ ├── scripted_pick_place.py -│ │ └── two_box_table.py # two-box 場景(Goal 1/2) -│ ├── fix_push_widen_dr.sh # push 修復 pipeline:加大 DR→重訓 PPO→重收→轉檔→重訓 ACT -│ └── recollect_retrain_barrier.sh # barrier 乾淨重收(無 marker)+ 重訓 ACT -│ -├── tools/ # 開發/驗證工具(依功能分子資料夾) -│ ├── checks/ # 健全性/資料檢查(env / hdf5_data / push_demos) -│ ├── smoke/ # 煙霧測試(planar_push / push_rl_env) -│ ├── viz/ # 分析/視覺化(多數純資料集、不需模擬器) -│ │ ├── eval_ood.py # OOD 泛化報告(ACT vs oracle,分區成功率) -│ │ ├── barrier_init_map.py # barrier 初始分佈 × 成功/失敗 俯視圖 -│ │ ├── push_camera_sensitivity.py # ACT push 各相機敏感度(ablation bar) -│ │ ├── push_training_curve.py # PPO push teacher 訓練曲線(success / reward) -│ │ └── pcd.py / push_demos.py # 點雲 / push demo 視覺化 -│ └── migrate_outputs.sh # 一次性:舊輸出 → _out/(dry-run 預設) -│ -├── eval/ # VLA Eval Harness(可安裝套件) -│ ├── vla_wrapper.py # VLAWrapper (ABC) / Dummy / ACTLocal / OpenVLA / SMOracle -│ ├── obs_adapter.py # Isaac Lab scene state → VLA 輸入 dict(build_obs_dict) -│ ├── eval_runner.py # EvalRunner:多環境 rollout + success latch -│ └── video_recorder.py # 三相機(overhead/wrist/side)逐 episode 錄影 -│ -├── data/ # 資料格式轉換 -│ └── convert_to_lerobot.py # HDF5 → LeRobot v3.0(parquet + mp4) -│ -├── experiments/ # 非 MVP 探索(可復現分析,不需模擬器) -│ └── act_push_failure/ # ACT push 失效根因分析(相機依賴 ablation) -│ ├── act_camera_ablation.py # E1 teacher-forcing + E2 逐相機 ablation -│ ├── run_all.sh # 一鍵復現(push + barrier 對照 + 影像) -│ ├── README.md # 分析說明(實驗 ↔ 推論 ↔ 證據) -│ ├── REPORT_SECTION.md # 可直接貼進報告的段落草稿 -│ └── results/ # 證據(log / json / 訓練影像) -│ -└── _out/ # ★所有 generated 產物統一根(.gitignore) - ├── datasets/ # franka__demos/dataset.hdf5(raw)+ lerobot/(LeRobot v3.0) - ├── eval/ # eval_*.json 評估結果 - ├── rl/ # rsl_rl 訓練 log + policy.pt / model_*.pt - ├── act/ # ACT checkpoints - ├── viz/ # demo gif/影片(含 demos/) - └── debug/ # 相機截圖 / 點雲 -``` -> 舊散落輸出(`datasets/` `results/` `logs/` `outputs/` `viz_demos/` `debug_images/`)整合進 `_out/`; -> 既有資料用 `bash tools/migrate_outputs.sh --run` 搬移(預設 dry-run 先預覽)。 +The diagnosis reproduces in **~5 minutes on a published dataset + checkpoint, no simulator required**. -> Goal 5 感測決策(D4):**IMU 已移除**(固定基座下,hand 的線加速度/角速度可由 joint-state -> + FK 完全推得,屬冗餘)。保留的感測 = overhead RGB-D + wrist RGB + joint-state + gripper + 指尖 contact。 +The design optimizes for what foundation-model robotics actually weighs: data infrastructure at +scale, empirical rigor, honest failure analysis, and reproducibility. It started as a *Robotics & Sim* +take-home challenge whose thesis was *"the challenge is not the quantity of the data, but the +quality"* (provenance in [docs/archive/](docs/archive/)). ---- +## Headline results -## 快速上手 +| Task | Oracle | Oracle SR | ACT (in-dist) | ACT (OOD ×1.5) | Learned camera | Status | +|---|---|:---:|:---:|:---:|---|:---:| +| `pick_place` | Warp state machine | — | — | — | overhead | demos ✅ | +| `barrier` | Warp state machine | 75% | **90%** | 55% | overhead (robust) | ✅ | +| `push` | PPO teacher (4096 env, ~98.5% train) | — | **0%** → _(pending fix run)_ | — | wrist (fragile) → overhead | 🔬 diagnosed + fix in progress | -### 前置需求 +The student **beats the teacher** on barrier (90% > 75%), and the push failure is diagnosed down to a +single causal lever — initial-state randomization width — with controlled camera ablations. Full +numbers and methodology: [docs/results.md](docs/results.md). -1. Isaac Lab(需先完成安裝,使用 `./isaaclab.sh`)。 -2. **一次性:把本專案裝成 editable 套件**(這樣各 entrypoint 不需 `sys.path` hack 即可 `from envs...`): +## Architecture -```bash -./isaaclab.sh -p -m pip install -e . -# 轉 LeRobot 的相依(通常在獨立 lerobot env):./isaaclab.sh -p -m pip install -e ".[lerobot]" -# 點雲視覺化:./isaaclab.sh -p -m pip install -e ".[viz]" ``` - -### Step 1:收集示範 - -```bash -# 收集 50 個成功 demo(headless + 開相機) -./isaaclab.sh -p scripts/collect/demos.py --task pick_place --num_demos 50 --headless --enable_cameras - -# 快速測試(只收 3 個) -./isaaclab.sh -p scripts/collect/demos.py --num_demos 3 --headless --enable_cameras + ┌──────────────── privileged oracle ────────────────┐ + scene │ Warp GPU state machine (pick / barrier) │ + (Franka + ─────▶ │ PPO RL teacher, 4096 envs (push) │ + two boxes) └───────────────────────┬───────────────────────────┘ + ▼ + multimodal demo collection + overhead RGB-D + wrist RGB + joint pos/vel/torque + + fingertip contact forces · 8-D action + │ + ▼ + raw HDF5 ──▶ LeRobot v3.0 (parquet + mp4) + │ + ┌───────────────┴────────────────┐ + ▼ ▼ + ACT imitation learning closed-loop eval harness + (chunked actions) VLAWrapper · success-latch + camera ablation · OOD sweep ``` -**輸出** → `_out/datasets/franka_pick_demos/dataset.hdf5` +

+ state-machine pick + PPO push teacher + ACT barrier success
+ Left → right: Warp state-machine pick · PPO push teacher · learned ACT barrier policy. +

-| 資料欄位 | Shape | -|---------|-------| -| `actions` | `(T, 8)` arm(7) + gripper(1) | -| `obs/joint_pos` / `joint_vel` / `joint_torque` | `(T, 9)` | -| `obs/object_pos` | `(T, 3)` env-local frame | -| `obs/object_quat` | `(T, 4)` wxyz | -| `obs/rgb_overhead` / `rgb_wrist` | `(T, 224, 224, 3)` uint8 | -| `obs/depth_overhead` | `(T, 224, 224, 1)` float32 公尺 | -| `obs/contact_lfinger` / `contact_rfinger` | `(T, 3)` 淨接觸力 (N) | +- **Multimodal observation** per step: overhead RGB-D (224²) + wrist RGB (224²) + joint + position/velocity/torque + dual fingertip contact forces. **Action:** 8-D (7-D IK-absolute + end-effector pose + gripper) at ~50 Hz. +- **Two oracles, by design.** A deterministic Warp state machine + ([docs/state-machine.md](docs/state-machine.md)) drives the prehensile pick / barrier tasks; a + learned PPO teacher (4096 parallel envs, ~98.5% training success) drives the contact-rich push, + which a hand-written controller handles poorly. -### Step 2:轉換為 LeRobot v3.0 格式 +Deep dive: [docs/architecture.md](docs/architecture.md). -```bash -python data/convert_to_lerobot.py \ - --input _out/datasets/franka_pick_demos/dataset.hdf5 \ - --output _out/datasets/lerobot/franka_pick_place -# fps 預設讀 HDF5 的 fps attr(collect 寫入=實際控制頻率 50); -# 舊資料集無此 attr → fallback 30 並警告,請改用新版 collect 重收,或手動 --fps 50。 - -# 不編碼影片(更快,不需要 imageio) -python data/convert_to_lerobot.py \ - --input _out/datasets/franka_pick_demos/dataset.hdf5 \ - --output _out/datasets/lerobot/franka_pick_place \ - --no_video -``` +## Signature case study — diagnosing a 0% push policy -### Step 3:訓練 ACT(IL) +The same pipeline that yields **90% on barrier** yields **0% on push**: the arm ignores the first box +and drives diagonally toward the corner from the first step. -ACT 在獨立的 lerobot conda env 訓練(`scripts/train/act.py` 是 `lerobot-train` 的薄 wrapper): +

+ push policy failing + push policy with wrist camera blacked out
+ Left: the push policy failing. Right: the same scene with the wrist camera blacked out — + the behavior barely changes, exposing the policy's reliance on the wrist view. +

-```bash -conda activate lerobot -# 小量驗證 pipeline(少量 steps、不開 wandb) -python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier_v0 --smoke +Per-camera ablation (mean action change when one camera is zeroed): -# 正式訓練 + wandb -python scripts/train/act.py \ - --dataset _out/datasets/lerobot/franka_barrier_v0 \ - --steps 100000 --batch-size 16 --wandb --wandb-project franka-vla -``` - -**輸出** → `_out/act//checkpoints/last/pretrained_model` +| ablation | push | barrier | +|---|:---:|:---:| +| black **overhead** Δ | 0.038 | **0.089** | +| black **wrist** Δ | **0.197** | 0.027 | -### Step 4:VLA Eval Harness +Push leans on the fragile, ego-motion-coupled **wrist** camera; barrier leans on the robust static +**overhead** camera. Teacher-forcing replay (EE-xy L1 = **0.011 m**) proves the model *did* learn the +demonstrations — so this is a closed-loop **covariate-shift** failure, not under-training. Root cause: +push initial-state randomization is only **±3 cm** (vs barrier **±13/±7 cm**), which makes the static +overhead view nearly invariant and uninformative, pushing the policy onto the wrist shortcut. The fix +widens the randomization ([`scripts/fix_push_widen_dr.sh`](scripts/fix_push_widen_dr.sh), via +`PUSH_BOX_DR`); the before→after comparison is in progress. -`eval/`(VLAWrapper / obs_adapter / EvalRunner)+ `scripts/eval/policy.py` 提供多環境 -rollout 評估,結果輸出到 `_out/eval/eval_YYYY-MM-DD_HH-MM-SS.json`。 +**Reproduce in ~5 minutes, no simulator:** ```bash -# Phase 1:用隨機 dummy policy 驗證 pipeline -./isaaclab.sh -p scripts/eval/policy.py --policy dummy --num_rollouts 20 --headless --enable_cameras - -# 評估訓好的 ACT(in-process;需 eval env 裝 lerobot) -./isaaclab.sh -p scripts/eval/policy.py --policy act --task barrier \ - --model_path _out/act//checkpoints/last/pretrained_model \ - --num_rollouts 20 --headless --enable_cameras -# 進階:--n_action_steps 25 提高 replan 頻率;--ablate_camera overhead|wrist 量相機貢獻 - -# SM oracle baseline 對照(pick/barrier):--oracle-pose gt|noisy 比較 privileged vs 去 privilege -./isaaclab.sh -p scripts/eval/policy.py --policy oracle --task barrier --oracle-pose gt \ - --num_rollouts 20 --headless --enable_cameras - -# 評估 fine-tuned OpenVLA -./isaaclab.sh -p scripts/eval/policy.py --policy openvla \ - --model_path /path/to/finetuned-openvla --unnorm_key franka_pick \ - --num_rollouts 50 --num_envs 4 --headless --enable_cameras +conda activate lerobot +bash scripts/bootstrap_assets.sh --minimal # pull the dataset + checkpoint from the HF Hub +bash experiments/act_push_failure/run_all.sh # regenerates the ablation evidence ``` -### (選用)Barrier / two-box 場景 - -```bash -# Goal 4:把方塊抓起越過屏障放到另一側,收集 barrier 示範 -./isaaclab.sh -p scripts/collect/demos.py --task barrier --num_demos 50 --headless --enable_cameras - -# Goal 1/2:純場景 demo(桌上兩顆方塊,無手臂) -./isaaclab.sh -p scripts/viz/two_box_table.py -``` +Full write-up: [docs/case-study-push.md](docs/case-study-push.md). -> **Barrier 越障原理**:屏障是 y=0 的一道靜態牆,高度 ≈ 1/3 手臂高(`BARRIER_HEIGHT`)。 -> 越障靠的是把共用 `PickAndPlaceSm` 的 `lift_height` / `approach_height` 調高 -> (見 `envs/tasks/barrier/collect_env.py` 常數),讓方塊在高處水平橫越屏障,之後才在另一側 -> 垂直下降放置——不需另寫狀態機。屏障變高時,請一併調高這兩個高度以維持越障淨空。 +## Quickstart -### (選用)Goal 3:Push(RL oracle 路徑) +### 1 · Reproduce the failure analysis (no GPU / simulator, ~5 min) -push 不走 SM,而是先用 PPO 訓一個 push teacher、匯出成 TorchScript 後當 oracle 收 demo。 -box 初始隨機化(DR)由 `PUSH_BOX_DR` 環境變數控制,train / collect / eval 共用同一值: +See the case-study block above — it runs entirely in the `lerobot` conda env on a published dataset + +checkpoint. -```bash -# ① 訓練 PPO push teacher(headless,多 env) -./isaaclab.sh -p scripts/train/push_rl.py --headless --num_envs 4096 -# tensorboard --logdir _out/rl/franka_push +### 2 · Run the full pipeline (requires Isaac Lab) -# ② 匯出最新 checkpoint → policy.pt(jit) -./isaaclab.sh -p scripts/rl/export_push.py --headless +sim2act uses two conda envs (full matrix in [CLAUDE.md](CLAUDE.md)): -# (選用)視覺化 / 量成功率 -./isaaclab.sh -p scripts/rl/play_push.py --num_envs 16 +| env | used for | +|---|---| +| `isaaclab` (Isaac Sim 5.1) | demo collection · PPO RL · LeRobot conversion · eval | +| `lerobot` (lerobot 0.5.2) | the simulator-free failure analysis | -# ③ 用 RL oracle 收 push demo → HDF5 -./isaaclab.sh -p scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras +```bash +conda run -n isaaclab python -m pip install -e ".[hub]" # one-time editable install + +# collect → convert → train → eval (barrier shown; see docs/architecture.md for all tasks) +python scripts/collect/demos.py --task barrier --num_demos 100 --headless --enable_cameras +python data/convert_to_lerobot.py --input _out/datasets/franka_barrier_official_demos/dataset.hdf5 \ + --output _out/datasets/lerobot/franka_barrier --state_keys joint_pos,joint_vel --no_depth +python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier --steps 40000 +python scripts/eval/policy.py --policy act --task barrier \ + --model_path _out/act//checkpoints/last/pretrained_model \ + --num_rollouts 20 --headless --enable_cameras ``` -### 已知限制:ACT push 相機依賴(已診斷,修復待跑) +Push uses the RL-teacher chain (`scripts/train/push_rl.py` → `scripts/rl/export_push.py` → +`scripts/collect/push_rl_demos.py`); see [docs/architecture.md](docs/architecture.md). -push 的 ACT 會學成**幾乎只依賴隨手移動的 wrist 第一人稱相機、幾乎不用靜態 overhead**, -closed-loop 一偏離 demo 就 OOD → covariate shift(根因=box init DR 僅 ±3cm 過窄)。 -完整可復現分析見 [`experiments/act_push_failure/`](experiments/act_push_failure/) -(`README.md` / `REPORT_SECTION.md`,純資料集 + checkpoint、不需啟動 Isaac Sim): +## What's inside -```bash -conda activate lerobot -bash experiments/act_push_failure/run_all.sh -``` +| Task | Oracle | Primary camera | Status | +|---|---|---|---| +| `pick_place` | Warp state machine | overhead | demos ✅ | +| `barrier` (pick over a ⅓-arm-height wall) | Warp state machine | overhead (robust) | ACT 90% ✅ | +| `push` (non-prehensile, box→box→corner) | PPO teacher | wrist → overhead (after fix) | diagnosed, fix in progress 🔬 | -修復已包成一鍵 pipeline `scripts/fix_push_widen_dr.sh`(把 DR 加大到 ±10–13cm 對齊 barrier, -重訓 PPO→重收→轉檔→重訓 ACT;先 `SPEEDRUN=1` 驗整條通)——**尚未執行**。 +**Sensor suite** (collected for every demo): overhead RGB-D, wrist RGB, joint position/velocity/torque, +and left/right fingertip contact forces — chosen for realistic sim-to-real transfer. ---- +## Repository layout -## State Machine 狀態說明 +``` +envs/ Isaac Lab env configs (base / tasks / scenes; clean inheritance chain) +controllers/ Warp GPU state machine (PickAndPlaceSm) +scripts/ pipeline entrypoints — collect / train / eval / rl / viz +eval/ VLA eval harness (VLAWrapper · EvalRunner · obs adapter · video) +data/ HDF5 → LeRobot v3.0 conversion +experiments/ act_push_failure/ — the flagship, simulator-free failure analysis +tools/ checks / smoke tests / analysis & visualization +docs/ architecture · case study · results · state machine · archive +_out/ all generated artifacts (gitignored; fetched via scripts/bootstrap_assets.sh) +``` -| State | 說明 | -|-------|------| -| `REST (0)` | 等待初始化(0.2s) | -| `APPROACH_ABOVE_OBJECT (1)` | 移至物件正上方 +0.1m | -| `APPROACH_OBJECT (2)` | 下降至物件位置 | -| `GRASP_OBJECT (3)` | 閉合夾爪(0.3s) | -| `LIFT_OBJECT (4)` | 抬起至 z=0.3m(越障時調高) | -| `MOVE_TO_PLACE (5)` | 移至放置目標正上方 | -| `PLACE (6)` | 下降至放置位置,開夾爪 | -| `LIFT_AFTER_PLACE (7)` | 垂直拉高(避免撞到方塊) | -| `RETREAT (8)` | 退回原點 (0.5, 0, 0.4) | -| `DONE (9)` | 保持位置,等待 env reset | +## Roadmap ---- +- Run the push DR-widening fix to completion and publish the before→after result. +- Validate additional policy backends — `OpenVLAWrapper` is wired in `eval/vla_wrapper.py` but + **not yet validated**; Octo / π0 are natural next wrappers. +- Attack covariate shift directly: DAgger / action-noise collection to cover off-trajectory views. -## 知識庫連結 +## Citation · License · Acknowledgements -- 實作細節(Obsidian):`2_DevOps-Tools/Isaac-Sim/07-FrankaPickVLA/` - (env-config / state-machine / demo-pipeline / lerobot-conversion / eval-harness / decision-log) -- 環境 config 繼承設計:`2_DevOps-Tools/Isaac-Sim/IsaacLab/`(env config 乾淨構築法) -- 專案框架與決策:`2_DevOps-Tools/Isaac-Sim/Project_Corvinus_Franka/` +If you use sim2act, please cite it (see [CITATION.cff](CITATION.cff)). Licensed under +**Apache-2.0** ([LICENSE](LICENSE)). Built on [Isaac Lab](https://github.com/isaac-sim/IsaacLab), +[LeRobot](https://github.com/huggingface/lerobot), [rsl_rl](https://github.com/leggedrobotics/rsl_rl), +and the [ACT](https://github.com/tonyzhaozh/act) architecture. diff --git a/README.zh-Hant.md b/README.zh-Hant.md new file mode 100644 index 0000000..4c889c7 --- /dev/null +++ b/README.zh-Hant.md @@ -0,0 +1,314 @@ +# sim2act — Franka VLA 模擬資料引擎(pick / barrier / push) + +> 🇬🇧 English: **[README.md](README.md)** | 本檔為繁體中文版(以英文版為準)。 + +基於 **IsaacLab** 建立的 Franka 操作示範收集與 VLA 訓練資料管線。 + +> **目標**:Env 設定 → 示範收集(SM oracle / RL oracle)→ LeRobot v3.0 格式 → ACT 訓練 → VLA Eval Harness + +--- + +## 緣起(Robotics & Sim 挑戰) + +本專案最初為 **Robotics & Sim(R&S)** 團隊的 Round-2 take-home 挑戰實作(核心命題:sim-to-real +VLA 資料「**重質不重量**」);原始題目與書面報告見 [`docs/archive/`](docs/archive/)。下表對應原始 Goals 1–5。 + +| Goal | 內容 | 本專案實作 | 狀態 | +|------|------|-----------|:---:| +| 1 | 桌上放一個 box | `scripts/viz/two_box_table.py`(`envs/scenes/two_box_table_env.py`)| ✅ | +| 2 | 第二個 box | 同上(場景含兩顆 box)| ✅ | +| 3 | 用 box1 把 box2 推向桌角 | push RL oracle(`envs/tasks/push/`,`scripts/train/push_rl.py`)| ✅ oracle;ACT 已診斷待修復 | +| 4 | 1/3 臂高屏障,pick 越障到另一側 | barrier SM(`envs/tasks/barrier/`,調高 lift/approach)| ✅ | +| 5 | 收集可實際使用的感測資料 | RGB-D + wrist RGB + joint-state + 指尖 contact(`envs/base/`)| ✅(IMU 經分析判為冗餘移除)| + +--- + +## Pipeline 概覽 + +``` +[ pick_place / barrier ] [ push ] +FrankaBaseEnvCfg + pick_place_cube_sm.py PPO push teacher(RL oracle) +(overhead + wrist camera, Warp SM) scripts/train/push_rl.py → rl/export_push.py + │ │ + ▼ ▼ + scripts/collect/demos.py scripts/collect/push_rl_demos.py + (--task pick_place|barrier) (RL policy rollout) + │ │ + └────────────────────┬─────────────────────────┘ + ▼ + _out/datasets/franka__demos/dataset.hdf5 + (actions + obs + rgb_overhead/wrist + depth + contact) + │ + ▼ + data/convert_to_lerobot.py → LeRobot v3.0(parquet + mp4) + │ + ┌────────────┴─────────────┐ + ▼ ▼ + scripts/train/act.py eval/ + scripts/eval/policy.py + (ACT,IL) ──────▶ (VLA rollout → _out/eval/eval_*.json) +``` + +--- + +## 目錄結構 + +``` +Isaac_sim/ +├── pyproject.toml # editable 套件定義(移除 sys.path hack 的關鍵) +│ +├── envs/ # 環境 config(base / tasks / scenes 分層) +│ ├── base/ # 共用基底(繼承鏈,每層只負責一件事) +│ │ ├── franka_base_env.py # FrankaSensorEnvCfg (+ overhead RGB-D / wrist RGB) +│ │ │ # └ FrankaPickVLAEvalEnvCfg (評估分支,無 recorder) +│ │ │ # FrankaBaseEnvCfg (+ contact 感測 + RecorderManager) +│ │ │ # └ FrankaPickVLAEnvCfg (pick-and-place) +│ │ └── recorders.py # PreStepStructuredObsRecorder → 結構化 obs/ +│ │ # joint_pos/vel/torque, object_pos/quat, +│ │ # rgb_overhead/wrist, depth_overhead, contact_l/rfinger +│ ├── tasks/ # 各任務(繼承 base,只加自己的場景/事件) +│ │ ├── push/ # push 走 RL oracle(SM push 已移除) +│ │ │ ├── __init__.py # PUSH_CORNER 桌角目標常數(RL reward/eval 共用) +│ │ │ ├── eval_env.py # FrankaPushVLAEvalEnvCfg(ACT eval) +│ │ │ └── rl/ # push RL oracle 子系統(PPO teacher) +│ │ │ ├── action.py # PlanarPushActionCfg (x,y,yaw) +│ │ │ ├── mdp.py # push 專用 obs/reward 函式 +│ │ │ ├── train_env.py # FrankaPushRLEnvCfg(4096 env,無相機;box DR 由 PUSH_BOX_DR 控制) +│ │ │ ├── collect_env.py # FrankaPushRLCollectEnvCfg(用 policy 收 demo) +│ │ │ └── ppo_cfg.py # PushPPORunnerCfg(rsl_rl 超參) +│ │ └── barrier/ +│ │ ├── collect_env.py # FrankaBarrierEnvCfg + apply_barrier_scene(Goal 4) +│ │ └── eval_env.py # FrankaBarrierVLAEvalEnvCfg(ACT eval) +│ └── scenes/ +│ └── two_box_table_env.py # TwoBoxTableSceneCfg:純場景(桌 + 兩方塊,無手臂,Goal 1/2) +│ +├── controllers/ # Warp GPU state machine(機器人無關 kernel) +│ └── pick_place_cube_sm.py # PickAndPlaceSm:REST→…→GRASP→LIFT→…→RETREAT→DONE +│ # (pick + barrier 共用;push 走 RL,無 SM) +│ +├── scripts/ # pipeline entrypoints(依功能分子資料夾) +│ ├── collect/ # 收集 demo +│ │ ├── demos.py # SM oracle(--task pick_place|barrier)→ HDF5 +│ │ └── push_rl_demos.py # RL oracle(push)→ HDF5 +│ ├── train/ # 訓練 +│ │ ├── act.py # ACT(IL,wrap lerobot-train) +│ │ └── push_rl.py # PPO(push RL teacher,rsl_rl) +│ ├── eval/ # VLA 評估 +│ │ ├── policy.py # 評估入口(--policy dummy|act|openvla|oracle) +│ │ ├── oracle_noise_sweep.py # oracle 位姿噪聲掃描(去 privilege 敏感度) +│ │ └── record_demos.sh # 一鍵錄製 in-dist / OOD / oracle 對照影片 +│ ├── rl/ # push RL 輔助 +│ │ ├── export_push.py # checkpoint → policy.pt(jit) +│ │ └── play_push.py # 播放/視覺化訓好的 policy(量成功率) +│ ├── viz/ # 場景視覺化 debug(不錄製) +│ │ ├── scripted_pick_place.py +│ │ └── two_box_table.py # two-box 場景(Goal 1/2) +│ ├── fix_push_widen_dr.sh # push 修復 pipeline:加大 DR→重訓 PPO→重收→轉檔→重訓 ACT +│ └── recollect_retrain_barrier.sh # barrier 乾淨重收(無 marker)+ 重訓 ACT +│ +├── tools/ # 開發/驗證工具(依功能分子資料夾) +│ ├── checks/ # 健全性/資料檢查(env / hdf5_data / push_demos) +│ ├── smoke/ # 煙霧測試(planar_push / push_rl_env) +│ ├── viz/ # 分析/視覺化(多數純資料集、不需模擬器) +│ │ ├── eval_ood.py # OOD 泛化報告(ACT vs oracle,分區成功率) +│ │ ├── barrier_init_map.py # barrier 初始分佈 × 成功/失敗 俯視圖 +│ │ ├── push_camera_sensitivity.py # ACT push 各相機敏感度(ablation bar) +│ │ ├── push_training_curve.py # PPO push teacher 訓練曲線(success / reward) +│ │ └── pcd.py / push_demos.py # 點雲 / push demo 視覺化 +│ └── migrate_outputs.sh # 一次性:舊輸出 → _out/(dry-run 預設) +│ +├── eval/ # VLA Eval Harness(可安裝套件) +│ ├── vla_wrapper.py # VLAWrapper (ABC) / Dummy / ACTLocal / OpenVLA / SMOracle +│ ├── obs_adapter.py # Isaac Lab scene state → VLA 輸入 dict(build_obs_dict) +│ ├── eval_runner.py # EvalRunner:多環境 rollout + success latch +│ └── video_recorder.py # 三相機(overhead/wrist/side)逐 episode 錄影 +│ +├── data/ # 資料格式轉換 +│ └── convert_to_lerobot.py # HDF5 → LeRobot v3.0(parquet + mp4) +│ +├── experiments/ # 非 MVP 探索(可復現分析,不需模擬器) +│ └── act_push_failure/ # ACT push 失效根因分析(相機依賴 ablation) +│ ├── act_camera_ablation.py # E1 teacher-forcing + E2 逐相機 ablation +│ ├── run_all.sh # 一鍵復現(push + barrier 對照 + 影像) +│ ├── README.md # 分析說明(實驗 ↔ 推論 ↔ 證據) +│ ├── REPORT_SECTION.md # 可直接貼進報告的段落草稿 +│ └── results/ # 證據(log / json / 訓練影像) +│ +└── _out/ # ★所有 generated 產物統一根(.gitignore) + ├── datasets/ # franka__demos/dataset.hdf5(raw)+ lerobot/(LeRobot v3.0) + ├── eval/ # eval_*.json 評估結果 + ├── rl/ # rsl_rl 訓練 log + policy.pt / model_*.pt + ├── act/ # ACT checkpoints + ├── viz/ # demo gif/影片(含 demos/) + └── debug/ # 相機截圖 / 點雲 +``` +> 舊散落輸出(`datasets/` `results/` `logs/` `outputs/` `viz_demos/` `debug_images/`)整合進 `_out/`; +> 既有資料用 `bash tools/migrate_outputs.sh --run` 搬移(預設 dry-run 先預覽)。 + +> Goal 5 感測決策(D4):**IMU 已移除**(固定基座下,hand 的線加速度/角速度可由 joint-state +> + FK 完全推得,屬冗餘)。保留的感測 = overhead RGB-D + wrist RGB + joint-state + gripper + 指尖 contact。 + +--- + +## 快速上手 + +### 前置需求 + +1. Isaac Lab(需先完成安裝,使用 `./isaaclab.sh`)。 +2. **一次性:把本專案裝成 editable 套件**(這樣各 entrypoint 不需 `sys.path` hack 即可 `from envs...`): + +```bash +./isaaclab.sh -p -m pip install -e . +# 轉 LeRobot 的相依(通常在獨立 lerobot env):./isaaclab.sh -p -m pip install -e ".[lerobot]" +# 點雲視覺化:./isaaclab.sh -p -m pip install -e ".[viz]" +``` + +### Step 1:收集示範 + +```bash +# 收集 50 個成功 demo(headless + 開相機) +./isaaclab.sh -p scripts/collect/demos.py --task pick_place --num_demos 50 --headless --enable_cameras + +# 快速測試(只收 3 個) +./isaaclab.sh -p scripts/collect/demos.py --num_demos 3 --headless --enable_cameras +``` + +**輸出** → `_out/datasets/franka_pick_demos/dataset.hdf5` + +| 資料欄位 | Shape | +|---------|-------| +| `actions` | `(T, 8)` arm(7) + gripper(1) | +| `obs/joint_pos` / `joint_vel` / `joint_torque` | `(T, 9)` | +| `obs/object_pos` | `(T, 3)` env-local frame | +| `obs/object_quat` | `(T, 4)` wxyz | +| `obs/rgb_overhead` / `rgb_wrist` | `(T, 224, 224, 3)` uint8 | +| `obs/depth_overhead` | `(T, 224, 224, 1)` float32 公尺 | +| `obs/contact_lfinger` / `contact_rfinger` | `(T, 3)` 淨接觸力 (N) | + +### Step 2:轉換為 LeRobot v3.0 格式 + +```bash +python data/convert_to_lerobot.py \ + --input _out/datasets/franka_pick_demos/dataset.hdf5 \ + --output _out/datasets/lerobot/franka_pick_place +# fps 預設讀 HDF5 的 fps attr(collect 寫入=實際控制頻率 50); +# 舊資料集無此 attr → fallback 30 並警告,請改用新版 collect 重收,或手動 --fps 50。 + +# 不編碼影片(更快,不需要 imageio) +python data/convert_to_lerobot.py \ + --input _out/datasets/franka_pick_demos/dataset.hdf5 \ + --output _out/datasets/lerobot/franka_pick_place \ + --no_video +``` + +### Step 3:訓練 ACT(IL) + +ACT 在獨立的 lerobot conda env 訓練(`scripts/train/act.py` 是 `lerobot-train` 的薄 wrapper): + +```bash +conda activate lerobot +# 小量驗證 pipeline(少量 steps、不開 wandb) +python scripts/train/act.py --dataset _out/datasets/lerobot/franka_barrier_v0 --smoke + +# 正式訓練 + wandb +python scripts/train/act.py \ + --dataset _out/datasets/lerobot/franka_barrier_v0 \ + --steps 100000 --batch-size 16 --wandb --wandb-project franka-vla +``` + +**輸出** → `_out/act//checkpoints/last/pretrained_model` + +### Step 4:VLA Eval Harness + +`eval/`(VLAWrapper / obs_adapter / EvalRunner)+ `scripts/eval/policy.py` 提供多環境 +rollout 評估,結果輸出到 `_out/eval/eval_YYYY-MM-DD_HH-MM-SS.json`。 + +```bash +# Phase 1:用隨機 dummy policy 驗證 pipeline +./isaaclab.sh -p scripts/eval/policy.py --policy dummy --num_rollouts 20 --headless --enable_cameras + +# 評估訓好的 ACT(in-process;需 eval env 裝 lerobot) +./isaaclab.sh -p scripts/eval/policy.py --policy act --task barrier \ + --model_path _out/act//checkpoints/last/pretrained_model \ + --num_rollouts 20 --headless --enable_cameras +# 進階:--n_action_steps 25 提高 replan 頻率;--ablate_camera overhead|wrist 量相機貢獻 + +# SM oracle baseline 對照(pick/barrier):--oracle-pose gt|noisy 比較 privileged vs 去 privilege +./isaaclab.sh -p scripts/eval/policy.py --policy oracle --task barrier --oracle-pose gt \ + --num_rollouts 20 --headless --enable_cameras + +# 評估 fine-tuned OpenVLA +./isaaclab.sh -p scripts/eval/policy.py --policy openvla \ + --model_path /path/to/finetuned-openvla --unnorm_key franka_pick \ + --num_rollouts 50 --num_envs 4 --headless --enable_cameras +``` + +### (選用)Barrier / two-box 場景 + +```bash +# Goal 4:把方塊抓起越過屏障放到另一側,收集 barrier 示範 +./isaaclab.sh -p scripts/collect/demos.py --task barrier --num_demos 50 --headless --enable_cameras + +# Goal 1/2:純場景 demo(桌上兩顆方塊,無手臂) +./isaaclab.sh -p scripts/viz/two_box_table.py +``` + +> **Barrier 越障原理**:屏障是 y=0 的一道靜態牆,高度 ≈ 1/3 手臂高(`BARRIER_HEIGHT`)。 +> 越障靠的是把共用 `PickAndPlaceSm` 的 `lift_height` / `approach_height` 調高 +> (見 `envs/tasks/barrier/collect_env.py` 常數),讓方塊在高處水平橫越屏障,之後才在另一側 +> 垂直下降放置——不需另寫狀態機。屏障變高時,請一併調高這兩個高度以維持越障淨空。 + +### (選用)Goal 3:Push(RL oracle 路徑) + +push 不走 SM,而是先用 PPO 訓一個 push teacher、匯出成 TorchScript 後當 oracle 收 demo。 +box 初始隨機化(DR)由 `PUSH_BOX_DR` 環境變數控制,train / collect / eval 共用同一值: + +```bash +# ① 訓練 PPO push teacher(headless,多 env) +./isaaclab.sh -p scripts/train/push_rl.py --headless --num_envs 4096 +# tensorboard --logdir _out/rl/franka_push + +# ② 匯出最新 checkpoint → policy.pt(jit) +./isaaclab.sh -p scripts/rl/export_push.py --headless + +# (選用)視覺化 / 量成功率 +./isaaclab.sh -p scripts/rl/play_push.py --num_envs 16 + +# ③ 用 RL oracle 收 push demo → HDF5 +./isaaclab.sh -p scripts/collect/push_rl_demos.py --num_demos 50 --num_envs 4 --headless --enable_cameras +``` + +### 已知限制:ACT push 相機依賴(已診斷,修復待跑) + +push 的 ACT 會學成**幾乎只依賴隨手移動的 wrist 第一人稱相機、幾乎不用靜態 overhead**, +closed-loop 一偏離 demo 就 OOD → covariate shift(根因=box init DR 僅 ±3cm 過窄)。 +完整可復現分析見 [`experiments/act_push_failure/`](experiments/act_push_failure/) +(`README.md` / `REPORT_SECTION.md`,純資料集 + checkpoint、不需啟動 Isaac Sim): + +```bash +conda activate lerobot +bash experiments/act_push_failure/run_all.sh +``` + +修復已包成一鍵 pipeline `scripts/fix_push_widen_dr.sh`(把 DR 加大到 ±10–13cm 對齊 barrier, +重訓 PPO→重收→轉檔→重訓 ACT;先 `SPEEDRUN=1` 驗整條通)——**尚未執行**。 + +--- + +## State Machine 狀態說明 + +| State | 說明 | +|-------|------| +| `REST (0)` | 等待初始化(0.2s) | +| `APPROACH_ABOVE_OBJECT (1)` | 移至物件正上方 +0.1m | +| `APPROACH_OBJECT (2)` | 下降至物件位置 | +| `GRASP_OBJECT (3)` | 閉合夾爪(0.3s) | +| `LIFT_OBJECT (4)` | 抬起至 z=0.3m(越障時調高) | +| `MOVE_TO_PLACE (5)` | 移至放置目標正上方 | +| `PLACE (6)` | 下降至放置位置,開夾爪 | +| `LIFT_AFTER_PLACE (7)` | 垂直拉高(避免撞到方塊) | +| `RETREAT (8)` | 退回原點 (0.5, 0, 0.4) | +| `DONE (9)` | 保持位置,等待 env reset | + +--- + +> 完整、最新的英文文件見 [README.md](README.md) 與 [`docs/`](docs/)。 diff --git a/data/convert_to_lerobot.py b/data/convert_to_lerobot.py index 424e493..a0ec4cd 100644 --- a/data/convert_to_lerobot.py +++ b/data/convert_to_lerobot.py @@ -245,7 +245,7 @@ def convert( # ── 結束:寫入 per-episode stats,關閉 writer ──────────────────────────── dataset.finalize() # v3.0:取代舊的 consolidate(run_compute_stats=True) - print(f"\n[convert] 完成!") + print("\n[convert] 完成!") print(f" Episodes : {n_episodes}") print(f" Output : {output_dir}") diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..491dc8d --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,238 @@ +# sim2act — System Architecture + +A deep-dive into the `sim2act` simulation data engine for VLA (vision-language-action) imitation learning on NVIDIA Isaac Lab. The engine drives a Franka Panda arm through three manipulation tasks — **pick**, **barrier** (pick over a wall), and **push** (chained two-cube push to a corner) — and turns simulated rollouts into a trained, closed-loop-evaluable ACT policy. + +This document is for foundation-model robotics engineers. It is grounded line-by-line in the source; every claim below points to a concrete class and file. It is intentionally honest about what works and what does not (notably: the push task currently fails closed-loop, and the OpenVLA path is wired but unvalidated). + +--- + +## 1. The data engine, end to end + +The pipeline is a linear sequence of stages, each owning one transformation. Two of the stages run a *privileged oracle* whose only job is to produce expert trajectories; everything downstream is sensor-only and oracle-agnostic. + +``` + sim2act DATA ENGINE + ┌───────────────────────────────────────────────────────────────────────────┐ + │ │ + │ (1) SCENE / ENV CONFIG │ + │ envs/base/franka_base_env.py │ + │ FrankaCubeLiftEnvCfg → FrankaSensorEnvCfg → FrankaBaseEnvCfg │ + │ + per-task scene (barrier wall / 2nd cube) via task modules │ + │ │ Franka + IK-Abs action + overhead RGB-D + wrist RGB │ + │ │ + dual fingertip contact + RecorderManager │ + │ ▼ │ + │ (2) PRIVILEGED ORACLE ── DUAL DESIGN (§4) ────────────────────────── │ + │ ┌─────────────────────────────┐ ┌──────────────────────────────┐ │ + │ │ pick / barrier: │ │ push: │ │ + │ │ Warp GPU state machine │ │ PPO RL teacher (rsl_rl) │ │ + │ │ controllers/ │ │ envs/tasks/push/rl/ │ │ + │ │ pick_place_cube_sm.py │ │ train_env.py + ppo_cfg.py │ │ + │ │ PickAndPlaceSm (10 states)│ │ 4096 parallel envs │ │ + │ └─────────────┬───────────────┘ └───────────────┬──────────────┘ │ + │ │ desired EE pose + gripper (8D) │ (x,y,yaw)→IK-Abs │ + │ ▼ ▼ │ + │ (3) DEMO COLLECTION (success-only; cameras ON) │ + │ scripts/collect/demos.py (SM: pick / barrier) │ + │ scripts/collect/push_rl_demos.py(RL policy replayed w/ sensors) │ + │ │ RecorderManager exports EXPORT_SUCCEEDED_ONLY │ + │ ▼ │ + │ (4) RAW HDF5 _out/datasets/_demos/dataset.hdf5 │ + │ obs/ {joint_pos,vel,torque, object_pos/quat, object2_*, │ + │ rgb_overhead, rgb_wrist, depth_overhead, │ + │ contact_lfinger/rfinger, ee_pos} + actions (T,8 IK-Abs) │ + │ │ │ + │ ▼ │ + │ (5) LeRobot v3.0 CONVERSION │ + │ data/convert_to_lerobot.py (LeRobotDataset.create / add_frame / │ + │ save_episode / finalize) │ + │ │ observation.state (concat), observation.images.{overhead, │ + │ │ wrist, depth} (MP4), action (8D), per-frame task string │ + │ ▼ │ + │ (6) ACT IMITATION LEARNING │ + │ LeRobot ACT, trained via `lerobot-train` (scripts/train/act.py wraps │ + │ it); checkpoint dir = pretrained_model │ + │ │ chunked action prediction │ + │ ▼ │ + │ (7) CLOSED-LOOP EVAL │ + │ scripts/eval/policy.py → eval/eval_runner.py (EvalRunner) │ + │ VLAWrapper (eval/vla_wrapper.py): ACTLocal / SMOracle / │ + │ Dummy / OpenVLA ◄── eval/obs_adapter.py builds the obs dict │ + │ success-latch → _out/eval/eval_.json │ + └───────────────────────────────────────────────────────────────────────────┘ +``` + +Two design invariants hold across the whole pipeline: + +- **One action format everywhere.** The recorded action is always an 8D IK-absolute end-effector pose + gripper (`[x,y,z, qw,qx,qy,qz, gripper]`). The push RL teacher internally outputs only `(x, y, yaw)`, but a dedicated recorder re-expands it to the same 8D before it ever touches HDF5 (§4.2). Downstream (LeRobot, ACT, eval) therefore never knows which oracle produced a demo. +- **One source of truth for sensors.** The camera/contact specs live in exactly one place (`envs/base/franka_base_env.py`) and are inherited by both the collect env and the eval env, so the training and evaluation distributions cannot silently drift (§2). + +Two conda environments split the work: + +- **`isaaclab`** runs everything that needs the simulator: SM/RL collection, PPO training (`scripts/train/push_rl.py`), LeRobot conversion, ACT training (in the fix pipeline), and closed-loop eval. +- **`lerobot`** (LeRobot 0.5.2) runs the no-simulator failure analysis (teacher-forcing L1 and camera ablation on the dataset alone — see `experiments/act_push_failure/`). + +--- + +## 2. Env-config inheritance (`envs/base/`) + +The config chain is deliberately shallow and each layer adds exactly one concern. From `envs/base/franka_base_env.py`: + +``` +FrankaCubeLiftEnvCfg (Isaac Lab stock: robot + IK-Abs action + lift task) + │ +FrankaSensorEnvCfg + overhead RGB-D + wrist RGB cameras ← perception layer + ├── FrankaPickVLAEvalEnvCfg + shorter episode; NO recorder ← eval branch + │ +FrankaBaseEnvCfg + dual fingertip contact + RecorderManager ← collection layer + ├── FrankaPickVLAEnvCfg pick-and-place (single cube) + └── FrankaBarrierEnvCfg + barrier wall (envs/tasks/barrier/collect_env.py) + (push collect: FrankaPushRLCollectEnvCfg also extends FrankaBaseEnvCfg) +``` + +**`FrankaSensorEnvCfg`** is the single source of truth for perception. In `__post_init__` it attaches `overhead_camera` (224×224, RGB + `distance_to_image_plane`, mounted at `pos=(0.5, 0.0, 1.5)` looking straight down) and `wrist_camera` (224×224 RGB, mounted on `panda_hand`). It also disables the stock LiftEnv `object_pose` command debug marker — that marker is a real scene prim and would otherwise be photographed by both cameras, contaminating both collected data and eval imagery. Disabling only `debug_vis` keeps the command itself available for obs/reward terms. + +**`FrankaBaseEnvCfg`** is the collection layer. On top of perception it: +- sets `episode_length_s = 20.0` (≈1000 steps, long enough that the SM finishes before any env timeout/reset), +- flips `robot.spawn.activate_contact_sensors = True` (the stock Franka cfg defaults this off, which would make every contact reading zero), +- adds two `ContactSensorCfg` on `panda_leftfinger` / `panda_rightfinger` (net contact force per fingertip), +- installs an `ActionStateRecorderManagerCfg` with `dataset_export_mode = EXPORT_SUCCEEDED_ONLY` (only successful episodes are written), +- and replaces the stock flat-obs recorder with `PreStepStructuredObsRecorderCfg` (the structured multimodal recorder, §3). + +**`FrankaPickVLAEvalEnvCfg`** is the key asymmetry. Rather than inherit from `FrankaBaseEnvCfg` and *turn off* the recorder, the eval branch forks directly off `FrankaSensorEnvCfg`. It is therefore born without a recorder or contact sensors — there is nothing to disable — while still sharing the exact camera config used during collection. It sets `episode_length_s = 10.0`, chosen as a safety margin over the harness budget (`max_steps=300 × control_dt=0.02s = 6.0s`); the actual episode end is owned by `EvalRunner`, not by the env timeout. + +### 2.1 How tasks extend the base, and the eval/collect split + +Per-task scene deltas are applied in each task's own module and surfaced through a registry, `envs/tasks/task_presets.py`, which is the single source of truth for *both* collect and eval specs. + +- **pick** (`FrankaPickVLAEnvCfg`) is a no-op extension of the base — it reuses the single `scene.object` cube; the fixed place target is passed to the state machine by the collect script, not baked into the env. +- **barrier** (`envs/tasks/barrier/collect_env.py`) adds a static collision wall and a `+y` cube spawn via `apply_barrier_scene(cfg)`. Crucially, that same function is called by the barrier *eval* env (`FrankaBarrierVLAEvalEnvCfg`, in `envs/tasks/barrier/eval_env.py`), so the wall geometry, spawn point, and init randomization are guaranteed identical between collection and eval. (A prior bug evaluated barrier against the *pick* env — no wall — guaranteeing 0%; the shared-function design exists specifically to prevent that class of drift.) +- **push** (`envs/tasks/push/eval_env.py`, `FrankaPushVLAEvalEnvCfg`) adds the second cube (`object2`) and pins the closed gripper. Its eval env imports `PushEventCfg` directly from the RL training env so the init-randomization radius (`PUSH_BOX_DR`) is shared by train, collect, and eval from one place. + +The registry encodes each task as a `TaskPreset` (collect side) holding an `EvalSpec` (eval side). The lazy `make_env_cfg` / `make_eval_env_cfg` factories defer all env imports until after `AppLauncher` has started Omniverse — a hard ordering constraint noted at the top of the file (env cfg modules must not be imported at module load). `place_target` is defined once per task and shared by collect and eval, eliminating the "place target written in two places" footgun. `compute_success(...)` centralizes the per-task collection success test (`sm_done` for pick, geometric `object_to_target` for barrier). + +--- + +## 3. Observation and action spec + +### 3.1 What gets recorded (HDF5 `obs/`) + +`PreStepStructuredObsRecorder` (`envs/base/recorders.py`) returns a structured dict each pre-step, which the RecorderManager expands into `obs/`: + +| Key | Shape | Meaning | +|---|---|---| +| `joint_pos` | (T, 9) | absolute joint angles | +| `joint_vel` | (T, 9) | joint velocities | +| `joint_torque` | (T, 9) | applied torque / effort | +| `object_pos` / `object_quat` | (T, 3) / (T, 4) | primary cube pose, env-local frame, quat wxyz | +| `ee_pos` | (T, 3) | TCP position (env-local), via `ee_frame` | +| `rgb_overhead` | (T, 224, 224, 3) | uint8 | +| `rgb_wrist` | (T, 224, 224, 3) | uint8 | +| `depth_overhead` | (T, 224, 224, 1) | float32 metres | +| `contact_lfinger` / `contact_rfinger` | (T, 3) | net fingertip contact force (world frame, N) | +| `object2_pos` / `object2_quat` | (T, 3) / (T, 4) | second cube — push only, emitted when `object2` exists in the scene | + +Sensor summary: **overhead RGB-D (224×224) + wrist RGB (224×224) + joint pos/vel/torque + dual fingertip contact forces.** IMU was deliberately dropped (design note D4): under a fixed base, hand linear/angular acceleration is recoverable from joint state + FK, so it is redundant. + +### 3.2 The 8D action + +The action is **8-dimensional**: a 7D IK-absolute end-effector pose `[x, y, z, qw, qx, qy, qz]` (env-local) plus 1 gripper command. The downward EE orientation is `wxyz = [0, 1, 0, 0]` (180° about X). Gripper convention: `+1.0 = open`, `-1.0 = close`. Control runs at ~50 Hz (`control_dt = sim.dt × decimation = 0.01 × 2 = 0.02 s`). + +For the SM tasks the recorder uses Isaac Lab's stock `PreStepActionsRecorder` (the SM already emits an 8D IK-Abs pose). For push, the RL action is only 3D, so a custom `PreStepPlanarPushActionRecorder` reads the internally-expanded 7D pose (`_pose_command`) from the action term and appends a constant gripper value (`gripper_value`, default `0.0`) to write 8D — keeping the on-disk format identical (§4.2). + +### 3.3 The eval-time observation (`eval/obs_adapter.py`) + +At eval time, `build_obs_dict(env, task_lang)` reconstructs a live obs dict from the scene *after* `env.step()` (PhysX updates sensor buffers at step end). It returns `joint_pos`, `joint_vel`, `ee_pos`, `ee_quat` (from the `ee_frame` FrameTransformer), `object_pos`, `object_quat`, `rgb_overhead`, `rgb_wrist`, and `task_lang`. `_get_rgb` normalizes TiledCamera output to 3-channel (dropping alpha if the camera yields RGBA). Note that ACT in practice consumes only a lean state slice (`joint_pos`, `joint_vel`) plus the two RGB images — see §6. + +--- + +## 4. The dual-oracle design + +The single most important architectural decision is that **the two task families use two different privileged teachers**, chosen by the geometry of the task, not by convenience. + +### 4.1 Pick / barrier → Warp GPU state machine + +`controllers/pick_place_cube_sm.py` implements `PickAndPlaceSm`, a finite state machine compiled as a **Warp GPU kernel** (`infer_state_machine`) that runs across all parallel envs in lockstep. It has **10 states** (`PickSmState`): + +``` +0 REST → 1 APPROACH_ABOVE_OBJECT → 2 APPROACH_OBJECT → 3 GRASP_OBJECT +→ 4 LIFT_OBJECT → 5 MOVE_TO_PLACE → 6 PLACE → 7 LIFT_AFTER_PLACE +→ 8 RETREAT → 9 DONE +``` + +Each state emits a desired EE transform + a gripper command (`GripperState.OPEN/CLOSE`) and advances when the EE is within `position_threshold` of its target *and* a per-state dwell time (`PickSmWaitTime`) has elapsed. `compute(ee_pose, object_pos, place_pos)` does the wxyz↔xyzw conversions Warp's `wp.transform` expects, launches the kernel, and returns an 8D action (`des_ee_pose` + gripper). `is_done()` reports `sm_state == 9`, which the collect loop uses to latch success. + +The state machine is parameterized, not forked: **barrier reuses the very same `PickAndPlaceSm`**, only with raised waypoints. `envs/tasks/barrier/collect_env.py` passes `lift_height=0.50`, `approach_height=0.32`, and `retreat_pos=(0.50, -0.22, 0.45)` so the cube is carried *high over* the wall (wall height `BARRIER_HEIGHT = 0.30 m`, with its top at `z ≈ 0.33` env-local) and only descends vertically on the far side; the retreat point sits high on the placing side to avoid re-crossing the wall. + +**Why a scripted oracle here:** pick and barrier are *kinematic* tasks — grasp a rigid body, move it along a collision-free arc, release. The solution is a small fixed sequence of EE waypoints with known geometry. A state machine expresses that exactly, runs at GPU scale with zero training, and is perfectly reproducible. The barrier results bear this out: the resulting ACT student reaches **90% in-distribution success vs the SM oracle's 75%** — i.e. the imitation student *beats its own privileged teacher* (the SM occasionally clips the wall; ACT learned a smoother, more reliable cross). OOD at `init_scale 1.5` drops to **55%**, which is the honest generalization ceiling of the in-distribution demo set. + +### 4.2 Push → PPO RL teacher + +Push is handled by an RL teacher under `envs/tasks/push/rl/`, trained with **rsl_rl PPO across 4096 parallel envs**. + +- **Env** (`train_env.py`, `FrankaPushRLEnvCfg`): swaps the arm action to `PlanarPushAction`, removes the gripper action and pins fingers closed (pure pusher), adds the second cube `object2`, and installs push-specific obs/reward/termination/event managers. Observations are *state-based and camera-free* (`PushObsCfg`: joint pos/vel, privileged box1/box2/EE positions, the constant corner target, last action) so 4096 envs run fast. `num_envs = 4096`, `episode_length_s = 8.0`. +- **Action** (`action.py`, `PlanarPushAction`): the policy outputs only **3D `(x, y, yaw)`**. The term subclasses Isaac Lab's `DifferentialInverseKinematicsAction` and, in `process_actions`, expands those 3 numbers into a full 7D pose — relative `dx, dy` from the *current* EE pose, `z` locked at `push_height`, orientation fixed downward and rotated by an absolute `yaw` about world-Z — then hands it to the proven differential-IK controller. The minimal action space speeds PPO convergence; computing from the live EE pose every step makes it closed-loop by construction. +- **Reward** (`mdp.py` + `PushRewardsCfg`): a chained-push shaping stack. Success is *not* "box2 reached the corner" alone — that admits a shortcut (gripper directly shoving box2). It is "box2 in corner **and** box1 still adjacent to box2" (`_success_mask`), plus a penalty on the EE approaching box2 (term `ee_away_from_box2`, which wraps `ee_near_box2` with a negative weight) to forbid bypassing box1. Progress terms are potential-based (velocity toward the corner, no idle baseline). +- **PPO** (`ppo_cfg.py`, `PushPPORunnerCfg`): actor/critic `[256,128,64]` ELU, `entropy_coef=0.006`, `gamma=0.98`, adaptive-KL LR schedule, `max_iterations=3000`. The original report records **~98.5% training success** for this teacher. + +**Why RL here:** push is *contact-rich and non-prehensile*. There is no grasp; the cube is moved by frictional contact, the contact point drifts, box1 must stay aligned on the box1→corner line while transmitting force to box2. There is no simple closed-form waypoint sequence — the policy must learn a feedback controller over contact dynamics. PPO with dense shaping is the right tool; a scripted SM is not. + +**The bridge that keeps the pipeline uniform** (`envs/tasks/push/rl/collect_env.py`, `FrankaPushRLCollectEnvCfg`): collection re-hosts the trained policy on top of `FrankaBaseEnvCfg` (cameras + recorder + contact), reusing the *identical* `PlanarPushAction` and `PushObsCfg` so the exported policy runs natively — zero obs rebuild, zero action recompute. It then swaps in `PreStepPlanarPushActionRecorderCfg` (assigned to `recorders.record_pre_step_actions`), which records the 8D IK-Abs pose the IK layer actually commanded. Result: push demos land on disk in the same 8D format as pick/barrier, and ACT training needs no push-specific code. + +### 4.3 Why push currently fails (and the held fix) + +Despite the ~98.5% RL teacher and a clean dataset, the **push ACT student scores 0% closed-loop (pre-fix)**. This is a genuine, diagnosed failure, not a bug: + +- The model *did* learn the demos: **teacher-forcing EE-xy L1 = 0.011 m**. +- But it learned a *shortcut*. Camera ablation on the dataset shows the push policy is far more sensitive to the wrist camera than the overhead: **black-wrist Δ = 0.197 vs black-overhead Δ = 0.038**. (Contrast barrier, which relies robustly on the overhead: **black-overhead Δ = 0.089 vs black-wrist Δ = 0.027**.) +- **Root cause:** the push init randomization is only **±3 cm** (`PUSH_BOX_DR = 0.03`), versus barrier's ±13/±7 cm. With cubes nearly fixed, the static overhead camera is uninformative, so the policy leans on the *moving* wrist camera — a feature that is reliable under teacher forcing but spirals under closed-loop covariate shift. +- **Fix:** widen the push init DR to ±10–13 cm via the `PUSH_BOX_DR` env var, which `train_env.py` threads through one shared `PushEventCfg` to train/collect/eval, then re-train end to end (`scripts/fix_push_widen_dr.sh`: PPO → export → recollect → convert → ACT). + +The after-fix closed-loop number is **_(pending fix run)_** — the retrain is held and no validated post-fix result exists yet. + +> **Extension point — OpenVLA / Octo / π0:** `OpenVLAWrapper` (§6) is wired into the eval harness and converts a 7D VLA output to the 8D IK-Abs format, but it is **unvalidated** — no run has confirmed it produces useful actions. Treat it as scaffolding, not a working baseline. + +--- + +## 5. LeRobot v3.0 conversion (`data/convert_to_lerobot.py`) + +`convert(...)` turns the RecorderManager HDF5 into a LeRobot v3.0 dataset using the LeRobot API directly (`LeRobotDataset.create` → `add_frame` → `save_episode` → `finalize`). + +- **State assembly.** `observation.state` is the concatenation, in fixed order, of whatever `OBS_STATE_KEYS` are present: `joint_pos, joint_vel, joint_torque, object_pos, object_quat, object2_pos, object2_quat, contact_lfinger, contact_rfinger`. `--state_keys` can narrow this (e.g. `joint_pos,joint_vel`) to match the lean eval obs; the per-dimension `names` are generated automatically. +- **Images → video.** `rgb_overhead` → `observation.images.overhead`, `rgb_wrist` → `observation.images.wrist`, each declared as `dtype="video"` (224×224×3) and encoded to MP4. +- **Depth.** `depth_overhead` (float32 m) is clamped to `DEPTH_MAX_RANGE=2.0 m`, normalized to a 3-channel uint8, and stored as `observation.images.depth` video. The lossless float remains in the HDF5; `--no_depth` skips it (used in the push lean config). +- **Action.** Written verbatim as the 8D `action` feature, with `names = panda_joint_0..6 + gripper`. +- **Task string.** v3.0 requires a per-frame `task`; it is attached on every `add_frame` and `save_episode` carries it. +- **fps.** Defaults to the HDF5 `fps` attr (collect writes the true control rate = 50); legacy datasets without the attr fall back to 30 with a warning. +- **Safety.** `success_only=True` by default skips any non-success demo (defense against mixed sets, even though collection is already `EXPORT_SUCCEEDED_ONLY`); an existing output dir is removed and rebuilt. + +Output layout (`meta/`, `data/chunk-000/`, `videos/chunk-000/observation.images.*`) is generated by the API. + +--- + +## 6. The eval harness + +Closed-loop evaluation is a clean separation between *what policy is driving* and *how an episode is scored*. + +### 6.1 `VLAWrapper` abstraction (`eval/vla_wrapper.py`) + +An abstract base with one required method, `predict_action(obs) -> (N, 8)` IK-Abs, and an optional `reset(env_ids)` for recurrent/queued state. Four implementations: + +- **`DummyVLAWrapper`** — random EE pose biased into the reachable workspace, fixed downward orientation, random gripper. Purpose: verify the harness end-to-end; expected ~0% success. +- **`ACTLocalWrapper`** — loads a LeRobot `ACTPolicy` **in-process** (no socket server; the server fallback was removed). It feeds the lean state (`joint_pos`, `joint_vel`) plus overhead+wrist RGB (permuted to NCHW, `/255`) and returns the post-processed 8D action with the quaternion re-normalized. It supports two operationally important knobs: + - `n_action_steps` — an inference-time replan-frequency override. The default equals `chunk_size=100` (≈open-loop); lowering it re-plans more often to fight covariate shift. (Presets set this: barrier `50`, push `50`.) + - `ablate_camera` — zero out the overhead or wrist input at inference, the closed-loop counterpart of the dataset ablation that exposed the push shortcut. + It also patches a missing top-level `"type":"act"` in some checkpoints' `config.json` (a known cross-version LeRobot loader quirk). +- **`SMOracleWrapper`** — wraps the *same* `PickAndPlaceSm` (via `preset.make_sm`) as a de-privileged-oracle baseline. `oracle_pose="gt"` feeds ground-truth object pose; `oracle_pose="noisy"` injects a persistent per-episode pose offset (`PoseNoiseCfg`) to emulate perception error. Output is the SM's native 8D, so the runner needs zero changes. (Not available for push — push has no SM.) +- **`OpenVLAWrapper`** — fine-tuned OpenVLA via HF `AutoModelForVision2Seq`; converts its 7D output `[x,y,z,qx,qy,qz,gripper]` to 8D by recovering `qw` from the unit-quaternion constraint and binarizing the gripper. **Wired but unvalidated** — the documented extension point for OpenVLA / Octo / π0. + +### 6.2 `EvalRunner` success-latch (`eval/eval_runner.py`) + +The runner steps a (possibly multi-env) Isaac Lab env, queries the policy, and scores with a **geometric success latch** rather than a terminal check — because `GymnasiumEnv` auto-resets the instant an env terminates, so reading object position *after* a terminal step would read the already-reset state. Instead `success_latch[i]` is set true if the target ever comes within `success_threshold` of the place target during the episode. + +The success predicate is per-task, driven by the `EvalSpec`: +- pick/barrier: target = `object`, within threshold **and** gripper open (released). +- push: target = `object2`, within threshold, **and** box1 (`chained_object`) is within `chained_gap` (default 0.12 m) of box2 — computed by `_chained_gap()` on the xy positions — with `require_gripper_open=False` (pusher stays closed). + +An episode ends on any of three triggers: env-terminated/truncated (`dones`), our `max_steps` timeout (manual `_reset_idx`), or a success early-stop after `success_hold_steps` consecutive in-target steps (makes `steps` meaningful as steps-to-success). Each episode's cube init position is logged so OOD success can be binned by init geometry. `_finalize` averages steps over *successful* episodes only and writes `_out/eval/eval_.json`. + +Note the deliberate asymmetry in episode budgets: `EvalSpec.max_steps` defaults to 300 but barrier overrides to 400 (its demos run ~290 steps, so 300 would risk a spurious timeout/0%). This is the same source-of-truth registry (`envs/tasks/task_presets.py`) that defines the collection specs — collect and eval read their per-task knobs from one place. diff --git a/Project-Corvinus-Report.pdf b/docs/archive/Project-Corvinus-Report.pdf similarity index 100% rename from Project-Corvinus-Report.pdf rename to docs/archive/Project-Corvinus-Report.pdf diff --git a/R&S Challenge.md b/docs/archive/R&S Challenge.md similarity index 100% rename from R&S Challenge.md rename to docs/archive/R&S Challenge.md diff --git a/docs/archive/README.md b/docs/archive/README.md new file mode 100644 index 0000000..ec88aa5 --- /dev/null +++ b/docs/archive/README.md @@ -0,0 +1,28 @@ +# Archive — original challenge brief & submission + +This project began as a take-home challenge for a **Robotics & Sim (R&S)** team building +**simulation-to-real pipelines for VLA models** in laboratory robotics. The brief's thesis — +> *"the challenge is not the quantity of the data, but the quality"* — +in notoriously messy lab environments is exactly the motivation behind the public **sim2act** +data engine. + +These files are kept here for **provenance only**. The main project (repo-root +[`README.md`](../../README.md)) is the generalized, public version and does not depend on anything +in this folder. + +- [`R&S Challenge.md`](R&S%20Challenge.md) — the original challenge brief (Goals 1–5). +- [`Project-Corvinus-Report.pdf`](Project-Corvinus-Report.pdf) — the original written submission. + ("Corvinus" was the submission codename, now retired in favor of **sim2act**.) + +## How the original goals map to the repo + +| Goal | Brief | Where in sim2act | +|---|---|---| +| 1 | A box on a table | `envs/scenes/two_box_table_env.py` | +| 2 | A second box | same scene (two boxes) | +| 3 | Push box-1 into box-2 to move it to a table corner | `envs/tasks/push/` — PPO RL oracle | +| 4 | Pick a block over a ⅓-arm-height barrier to the other side | `envs/tasks/barrier/` — Warp state machine | +| 5 | Collect realistically usable sensor data (RGB, tactile, joint-state, …) | `envs/base/` — overhead RGB-D + wrist RGB + joint pos/vel/torque + fingertip contact forces | + +> Per the brief, generative-AI use was permitted provided the prompts were disclosed; that +> disclosure accompanied the original submission. diff --git a/docs/case-study-push.md b/docs/case-study-push.md new file mode 100644 index 0000000..72d0d33 --- /dev/null +++ b/docs/case-study-push.md @@ -0,0 +1,146 @@ +# Case Study: Why the ACT Push Policy Scored 0% — A Camera-Reliance Shortcut + +A post-mortem of a hard, total failure in the sim2act data engine: an ACT (Action Chunking Transformer) policy trained on RL-expert demonstrations that **never touches the box it is supposed to push**, and drives straight for the corner instead. The same perception stack and training pipeline succeed on the `barrier` task, which gives us a clean control group and turns "the model is bad" into a specific, falsifiable root cause. + +Every number below is reproducible from `experiments/act_push_failure/` (no simulator required). Sources: + +- `experiments/act_push_failure/act_camera_ablation.py` — the diagnostic (E1 teacher-forcing + E2 per-camera ablation). +- `experiments/act_push_failure/results/push_summary.json`, `experiments/act_push_failure/results/barrier_summary.json` — the evidence. +- `experiments/act_push_failure/README.md`, `experiments/act_push_failure/REPORT_SECTION.md` — the original (Chinese) analysis this English case study is promoted from. + +--- + +## 1. Problem + +**Task.** Push `box1` into `box2`, then push `box2` into the table corner. The teacher is a PPO policy (privileged state observations, 4096 parallel envs, ~98.5% training success per the original report); we collect successful rollouts as demonstrations and train ACT on the RGB + proprioception observation that a real robot would have. + +**Symptom.** The trained ACT push policy has **0% closed-loop success (pre-fix)**. It does not approach `box1` at all. From the very first step the end-effector drives diagonally forward — past `box2` — toward the corner of the table, as if the boxes were not there. + +**Why this is alarming.** The exact same sensor suite (overhead RGB-D 224×224 + wrist RGB 224×224 + joint pos/vel/torque + dual fingertip contact forces; 8D action = 7D IK-absolute EE pose + 1 gripper; ~50 Hz control) and the same `collect → LeRobot convert → ACT train → eval` pipeline produce a **working** `barrier` policy that reaches **90% in-distribution success and actually beats its 75% SM oracle teacher** (OOD at `init_scale` 1.5 drops to 55%). So the failure is not in ACT, the wrapper, or the data plumbing in general — it is specific to the push setup. The job of this case study is to localize it. + +--- + +## 2. Root Cause (one paragraph) + +The push policy **learned to rely almost entirely on the moving, egocentric wrist camera, and to essentially ignore the static overhead camera.** The wrist view is tightly correlated with the action (the camera rides the end-effector), so it is trivially easy to fit in-distribution and yields a low training loss — but it is fragile. In closed loop, the moment the executed trajectory deviates slightly from the demonstrations, the wrist camera sees a viewpoint that never appeared in training (out-of-distribution); the action error grows, which moves the wrist view further off-distribution, which grows the error again — a textbook **covariate-shift spiral**. The policy collapses back to a vision-independent prior: "drive to the corner." + +The upstream cause is the **data distribution, not the model.** Push initialized the boxes with only **±3 cm** of domain randomization (`PUSH_BOX_DR`, default `0.03`, in `envs/tasks/push/rl/train_env.py`). Across every demo the static overhead camera therefore sees almost the same picture, so it carries no discriminative signal about *where* to push. The policy takes the path of least resistance and latches onto the one input that *does* vary with the action — the wrist camera — as a shortcut. Barrier, by contrast, randomizes by **±13 / ±7 cm**, which forces the policy onto the viewpoint-robust overhead camera, and so it generalizes. + +--- + +## 3. Evidence + +The analysis runs as two dataset-only experiments (no Isaac Sim), with `barrier` as the control group. Both are produced by a single script, `act_camera_ablation.py`, which loads one demo episode and feeds each recorded frame through **the exact same `ACTLocalWrapper`** used at eval time (same `ACTPolicy`, same pre/post-processors). + +### E1 — Teacher-forcing replay (rules out under-training / normalizer / mean-collapse) + +Feed the model the recorded ground-truth observation frame by frame and compare its predicted action against the recorded action. If this L1 is small, the inference path is correct and **the model genuinely learned the demonstrations**. + +| Metric (push, episode 0, 40 frames) | Value | +|---|---| +| Mean L1, all 8 action dims | 0.0087 | +| **Mean L1, EE-xy** | **0.011 m** | + +An end-effector-xy reproduction error of **0.011 m** on teacher-forced demo frames is tight. This eliminates the cheap explanations: it is **not** a LeRobot/normalizer version mismatch, **not** under-training, and **not** mean-collapse (the model is not emitting a constant). The model learned the mapping; the failure must be about *which input it learned to trust* and what happens when that input goes off-distribution in closed loop. + +### E2 — Per-camera ablation (locates the shortcut) + +For the same frames, black out one camera at a time and measure how far the predicted EE-xy target moves (`|Δ|`, meters). A large shift = the policy depends heavily on that camera. + +| Task | Black overhead Δ | Black wrist Δ | Primary reliance | Closed-loop eval | +|---|:---:|:---:|---|:---:| +| **push** | 0.038 | **0.197** | **wrist (fragile)** | Fail (0%) | +| **barrier** | **0.089** | 0.027 | **overhead (robust)** | Success (90%) | + +The push numbers are decisive: blacking the **wrist** moves the target by **0.197 m** while blacking the **overhead** barely moves it (**0.038 m**) — a ~5× asymmetry. The push policy is heavily wrist-dependent. (Note this is the *opposite* of causal confusion / ignoring the cameras — the policy over-relies on vision, just on the *wrong* camera.) + +**The blacked-wrist target reproduces the failure symptom exactly.** With the wrist camera removed, the push policy's mean EE-xy target collapses to **[0.64, 0.25]** — past `box2`, out toward the corner — the very behavior observed in closed-loop rollouts (`black_wrist_mean_xy` in `push_summary.json`). This closes the causal loop: *wrist view fails ⇒ the exact behavior we see*. In closed loop the wrist view does not need to be blacked out; covariate shift degrades it continuously, with the same effect. + +### The barrier control group (why barrier succeeds and push fails) + +Barrier is the same code path with a working outcome, and its ablation is **mirror-imaged**: it depends on the **overhead** camera (Δ = 0.089) and is nearly indifferent to the wrist (Δ = 0.027). The overhead camera is static and sees the whole scene, so its view is stable under trajectory deviations — there is no covariate-shift spiral to fall into. This is exactly why barrier generalizes and push does not, and it confirms the difference is the *learned camera reliance*, not anything else in the stack. + +### Why the same camera config yields opposite reliance + +The two tasks share an identical camera rig; only the data distribution and task geometry differ: + +| Factor | push | barrier | +|---|---|---| +| Box init DR | **±3 cm** (`PUSH_BOX_DR`, `envs/tasks/push/rl/train_env.py`) | **±13 / ±7 cm** (`envs/tasks/barrier/collect_env.py`) | +| Demo length × count | ~40 frames × 50 | ~287 frames | +| Task nature | low, in-contact pushing (wrist sits near the object, highly action-correlated) | pick-and-place (overhead sees the global layout) | + +Narrow DR ⇒ the static overhead view is nearly constant across all demos ⇒ no discriminative signal ⇒ the policy grabs the action-correlated wrist camera as a shortcut. (A supporting E3 check dumps the training images and confirms they are not blank — push wrist frames have mean intensity ≈115 — so this is a learned-reliance problem, not corrupt data.) + +**Conclusion.** The failure is not model capacity, data quality, or the inference implementation. It is a **learned dependence on a fragile egocentric camera, caused upstream by too-narrow initial-state randomization.** + +--- + +## 4. The Fix + +Widen push initial-state randomization to match the barrier scale, regenerate demos with the RL expert (cheap, fully automatic), and retrain ACT. DR is controlled by the `PUSH_BOX_DR` environment variable, which is read by a single `PushEventCfg` shared across train / collect / eval, so one setting stays consistent end-to-end. + +```bash +conda activate isaaclab +SPEEDRUN=1 ./scripts/fix_push_widen_dr.sh # verify the whole pipeline first (small run) +./scripts/fix_push_widen_dr.sh # full: PUSH_BOX_DR ±10–13 cm, PPO → 50 demos → ACT +``` + +The script (`scripts/fix_push_widen_dr.sh`) runs the full pipeline with the box init DR widened to the barrier scale (**±10–13 cm**, vs barrier's ±13/±7 cm). It exports the radius through `PUSH_BOX_DR`; the script's `BOX_DR` default is `0.10` (±10 cm) and is overridable (e.g. `BOX_DR=0.12`). The steps: retrain PPO (4096 envs) → export TorchScript policy → re-collect demos with cameras on → convert to LeRobot → retrain ACT. + +**Mechanism.** Wider DR makes the overhead camera vary meaningfully across demos, restoring its discriminative signal, while also broadening wrist-view coverage so off-trajectory viewpoints are less out-of-distribution. Both pressures push the policy off the wrist shortcut. + +**Expected primary signal (the falsifiable prediction).** Re-run the *same* `act_camera_ablation.py` on the retrained checkpoint. We expect the reliance to **flip toward overhead** — i.e. the **black-overhead Δ rises** and the **black-wrist Δ falls** — converging toward the barrier-like profile (overhead Δ > wrist Δ). Honest read-out criteria, defined in advance: + +- **Success:** black-overhead Δ becomes clearly larger than black-wrist Δ **and** closed-loop push success rises well above 0%. +- **Partial:** the camera-reliance asymmetry shrinks or flips, but closed-loop success is still low → DR helped but is not sufficient on its own; escalate to the secondary mitigations below. +- **No change:** ablation profile and 0% success persist → the wrist shortcut is not driven by DR alone; revisit the hypothesis rather than tuning blindly. + +**Secondary mitigations, by ROI** (if the DR fix is only partial): + +1. Retrain an **overhead-only** variant (drop the wrist camera). If that alone works, it 100% confirms the wrist-dependence diagnosis. +2. Collect more and longer demos; add wrist-image augmentation (crop / color jitter). +3. **DAgger** or inject action noise during collection to cover off-trajectory wrist viewpoints — directly attacking the covariate shift. + +--- + +## 5. Before → After + +The retrain is intentionally held, so the after-fix column is a placeholder. **No after-fix numbers are fabricated.** + +| Signal | Before (±3 cm DR) | After (±10–13 cm DR) | +|---|:---:|:---:| +| Black-overhead Δ (m) | 0.038 | _(pending fix run)_ | +| Black-wrist Δ (m) | 0.197 | _(pending fix run)_ | +| Primary camera reliance | wrist (fragile) | _(pending fix run)_ | +| Teacher-forcing EE-xy L1 (m) | 0.011 | _(pending fix run)_ | +| Closed-loop push success | 0% | _(pending fix run)_ | + +Target shape of the "after" column: overhead Δ > wrist Δ (barrier-like), with closed-loop success well above 0%. + +--- + +## 6. Reproduce (~5 min, no simulator) + +The entire diagnostic runs off the saved dataset + checkpoint — no Isaac Sim needed. It uses the `lerobot` conda environment (lerobot 0.5.2), the same one used for the no-simulator failure analysis. + +```bash +conda activate lerobot +bash experiments/act_push_failure/run_all.sh +``` + +Outputs land in `experiments/act_push_failure/results/`: + +- `push_summary.json` / `push_replay.log` — E1 + E2 for push. +- `barrier_summary.json` / `barrier_replay.log` — the control group. +- `images/push_{wrist,overhead}_*.png` — dumped training frames (E3). + +To validate a retrained checkpoint, point the same script at the new dataset/model (see the eval and ablation commands printed at the end of `scripts/fix_push_widen_dr.sh`) and check whether the camera-reliance asymmetry has flipped. + +--- + +## Appendix — Notes and caveats + +- **Numbers** are quoted verbatim from `push_summary.json` / `barrier_summary.json`. The barrier black-overhead Δ is `0.089` (the raw JSON stores `0.08947…`; the original Chinese README/REPORT_SECTION round it to `0.090`). +- **OpenVLA / Octo / π0** are an extension point in this repo: `OpenVLAWrapper` is wired but **unvalidated** — this case study concerns ACT only and makes no claim about those backbones. +- **The "after" column is unverified.** Treat the fix as a hypothesis with a pre-registered, falsifiable success criterion, not a settled result. diff --git a/docs/images/act_barrier_success.gif b/docs/images/act_barrier_success.gif new file mode 100644 index 0000000..f1888d1 Binary files /dev/null and b/docs/images/act_barrier_success.gif differ diff --git a/docs/images/act_push_fail.gif b/docs/images/act_push_fail.gif new file mode 100644 index 0000000..397f81e Binary files /dev/null and b/docs/images/act_push_fail.gif differ diff --git a/docs/images/act_push_fail_no_wrist.gif b/docs/images/act_push_fail_no_wrist.gif new file mode 100644 index 0000000..a6eb22b Binary files /dev/null and b/docs/images/act_push_fail_no_wrist.gif differ diff --git a/docs/images/rl_push_suc_16.gif b/docs/images/rl_push_suc_16.gif new file mode 100644 index 0000000..7cb1f72 Binary files /dev/null and b/docs/images/rl_push_suc_16.gif differ diff --git a/docs/images/sm_pick_9.gif b/docs/images/sm_pick_9.gif new file mode 100644 index 0000000..67d0dbc Binary files /dev/null and b/docs/images/sm_pick_9.gif differ diff --git a/docs/results.md b/docs/results.md new file mode 100644 index 0000000..88b649c --- /dev/null +++ b/docs/results.md @@ -0,0 +1,128 @@ +# Results + +Consolidated evaluation results for the **sim2act** simulation data engine on NVIDIA Isaac Lab. Three Franka manipulation tasks — `pick`, `barrier` (pick-over-barrier, prehensile), and `push` (box-into-box-to-corner, non-prehensile) — built on a single pipeline: a **privileged oracle** collects high-quality demonstrations, which an **ACT** (Action Chunking Transformer) policy distils into a camera-only closed-loop controller. The oracle may read ground-truth object pose; the distilled policy sees only the two camera images plus proprioception. That asymmetry is deliberate: the quantity being measured is visual and spatial robustness the policy must learn that the oracle never needed. + +> **Headline:** On `barrier`, the distilled student **beats its privileged teacher** — ACT reaches **90%** in-distribution success vs. the state-machine oracle's **75%**. The policy is not merely copying the demonstrations; inside the trained regime it is *more* robust than the controller that generated them. The same pipeline applied to `push` currently scores **0%** (pre-fix). That contrast is what makes this useful as a diagnostic: holding the pipeline fixed and varying only the task isolates *why* distillation succeeds or fails (see [Camera ablation](#3-camera-ablation--what-the-policy-relies-on) and [Root cause](#root-cause-of-the-push-failure)). + +All numbers below are measured. The `push` after-fix retrain is held and **not yet run**; rows that would contain after-fix numbers are marked `_(pending fix run)_` and contain no fabricated values. + +--- + +## Setup (common to all tasks) + +| Component | Value | +|---|---| +| Simulator | NVIDIA Isaac Lab, Franka Panda arm | +| Sensors | overhead RGB-D (224×224) + wrist RGB (224×224) + joint pos/vel/torque + dual fingertip contact forces | +| Action space | 8-D: 7-D IK-absolute end-effector pose (`x, y, z, qw, qx, qy, qz`) + 1 gripper | +| Control rate | ~50 Hz | +| Distilled policy | ACT (Action Chunking Transformer); inputs = overhead + wrist RGB + proprioception only (no privileged pose) | +| `barrier` teacher | hand-written Warp state machine (sufficient where physics is stable) | +| `push` teacher | PPO RL policy (`rsl_rl`), 4096 parallel envs; the push is too physically unstable for an open-loop state machine | + +Two conda environments are used: **`isaaclab`** (sim: demo collection, PPO RL, LeRobot conversion, in-sim eval, and ACT training in the fix pipeline) and **`lerobot`** (lerobot 0.5.2; runs the no-simulator failure analysis — teacher-forcing replay and camera ablation, which need only the dataset + checkpoint, no Isaac Sim). + +> **Extension point — large VLA backbones (UNVALIDATED).** `OpenVLAWrapper` is wired into the eval harness (`eval/vla_wrapper.py`, selectable via `scripts/eval/policy.py --policy openvla`) as the integration seam for OpenVLA / Octo / π₀-class models. It has **not** been validated end-to-end on these tasks — no success numbers exist for it and none are claimed here. It is a forward hook, not a result. + +--- + +## 1. Per-task success + +Success rate of the distilled ACT policy vs. its privileged oracle, in-distribution (training initialization range), unless noted. + +| Task | Distilled ACT | Privileged oracle | Notes | +|---|:---:|:---:|---| +| **barrier** (pick-over-barrier) | **90%** | 75% (SM oracle) | **Student beats teacher** in-distribution. Replan = 50 actions/chunk (tuned; see below). | +| **push** (box→box→corner), pre-fix | **0%** | ~98.5% PPO teacher (training success) | Policy skips box-1 and drives straight at box-2 — diagnosed covariate-shift failure (see root cause). | +| **push**, after fix | `_(pending fix run)_` | ~98.5% PPO teacher | Fix = widen `push` init DR; retrain held. No after-fix number is available. | +| **pick** | demonstrations collected | shared pick-and-place SM | Demonstration set collected; full ACT success eval not reported here. | + +Notes: +- **barrier 90% > 75% is the central finding.** The 75% oracle is a geometric state machine reading ground-truth pose; the 90% ACT sees only cameras. Inside the demonstrated range the end-to-end policy is more robust than the modular pose+planner controller it distilled from. +- The **push 0%** is *not* a missing experiment — it is a reproduced, diagnosed failure (the model demonstrably learned the demos; see [§4](#4-teacher-forcing--the-model-learned-the-demos)). The PPO teacher itself solves the task (~98.5% training success across 4096 parallel envs, per the original report), so the data and the oracle are sound; the failure is in what the *vision* policy latched onto. +- **barrier replan frequency** is an inference-only lever (no retraining): executing 50 actions per replan (vs. the 100-step default chunk, effectively open-loop over a ~287-step episode) is what raises in-distribution success to the 90% operating point. + +--- + +## 2. Barrier — OOD spatial generalization + +Because the policy learns the task from images, it should **interpolate** inside the demonstrated initialization range but is not expected to **extrapolate** beyond it. Tested at two initialization scales — the training range (`×1.0`) and that range expanded by 50% to add an unseen outer ring (`×1.5`) — 20 rollouts each, cube-at-rest-within-8 cm success criterion. + +| Initialization scale | Coverage | Policy (ACT) | SM oracle (GT pose) | +|---|---|:---:|:---:| +| **×1.0** | inside training range | **90%** | 75% | +| **×1.5** | training range + unseen outer ring | **55%** | 75% | + +Interpretation: +- **The ranking inverts out of distribution.** The ACT policy drops from 90% → 55% while the oracle, a geometric controller reading ground-truth pose, holds a flat 75% because it extrapolates by construction. +- The drop is a **generalization gap, not an impossible task**: the oracle solves those same outer-ring starts (stable 75% across both scales), so the failures are spatially localized to the unseen ring, and the success/failure boundary tracks the *training box* rather than task difficulty. This is exactly what image-based interpolation predicts. +- **Fairness caveat.** The 75% baseline here reads *ground-truth* pose, which is immune to the lighting/clutter/pose error a camera-only policy faces. The intended apples-to-apples baseline is a noisy-pose oracle (the same state machine fed an *estimated* pose), so both pipelines see realistic perception; per the original report that comparison is still pending and is not the source of these numbers. +- Region/initialization analysis and the top-down success/failure map are produced by `tools/viz/eval_ood.py` (training range encoded as `x ∈ [0.37, 0.63]`, `y ∈ [0.18, 0.32]`; pure numpy + matplotlib, runnable without Isaac Lab). The next lever this points to is **wider, uniformly covered data and a pretrained spatial prior**, not more training steps. + +--- + +## 3. Camera ablation — what the policy relies on + +Per-camera input ablation run **without** the simulator: for real demonstration frames, blank one camera and measure how much the policy's predicted action changes vs. full observation, `Δ = mean_t ‖a_full(t) − a_blank-cam(t)‖`. A large Δ means the policy depends heavily on that camera. Source: `tools/viz/push_camera_sensitivity.py` (and `experiments/act_push_failure/act_camera_ablation.py`); numbers from `experiments/act_push_failure/results/{push,barrier}_summary.json`. + +| Task | Δ blank overhead | Δ blank wrist | Primary reliance | Eval outcome | +|---|:---:|:---:|---|:---:| +| **push** | 0.038 | **0.197** | wrist (moving / fragile) | fail (0%) | +| **barrier** | **0.089** | 0.027 | overhead (static / robust) | success (90%) | + +One-line interpretation: **the successful policy leans on the static overhead camera; the failing one leans on the moving wrist camera.** The `push` policy's heavy wrist reliance (Δ 0.197 vs. 0.038) is a *shortcut* — the wrist view is tightly coupled to the action, so it fits easily in-distribution but goes out-of-distribution the instant the closed-loop trajectory drifts. Direct corroboration: when the wrist image is blanked, the push policy's predicted EE target collapses to `[0.64, 0.25]` (past box-2, toward the corner) — i.e. blanking the wrist reproduces the exact observed failure behavior. `barrier`, by contrast, relies on the viewpoint-stable overhead camera (Δ 0.089 vs. 0.027), which is why the same pipeline generalizes there. + +--- + +## 4. Teacher-forcing — the model learned the demos + +To rule out a broken inference path, under-training, or mean-collapse, the model is fed the demonstration's ground-truth observations frame-by-frame and its predicted action is compared to the recorded action. + +| Task | Teacher-forcing EE-xy L1 | +|---|:---:| +| **push** | **0.011 m** | +| barrier | 0.006 m | + +The push EE-xy L1 of **0.011 m** proves the ACT model *did* learn the demonstrations and the inference pipeline is correct. Combined with §3, this is decisive: the push failure is **not** model capacity, data quality, normalization mismatch, under-training, or mean-collapse — it is a closed-loop covariate-shift failure driven by reliance on the fragile egocentric (wrist) camera. + +--- + +## Root cause of the push failure + +The diagnosis (`experiments/act_push_failure/`) chains the evidence above into a single mechanism: + +1. **Init DR is too narrow.** The `push` box initialization is randomized by only **±3 cm**, vs. **±13 / ±7 cm** for `barrier`. +2. **The static overhead camera becomes uninformative.** With ±3 cm of variation, the overhead view is nearly identical across every demo, so it carries little signal about *where to push*. +3. **The policy takes the wrist-camera shortcut.** Lacking a discriminative overhead signal, the policy latches onto the moving wrist camera (which *does* vary with the action) — confirmed by the ablation (Δ 0.197 wrist vs. 0.038 overhead). +4. **Closed-loop covariate-shift spiral.** At inference the wrist view goes out-of-distribution as soon as the trajectory drifts; errors amplify and the policy regresses to a vision-free prior — skipping box-1 and driving straight at box-2. + +This is the *same* covariate-shift mechanism quantified by the barrier replan-frequency study, but the contact-rich push has no recovery margin, so it surfaces as total failure rather than a degraded success rate. + +### The fix (held, not yet run) + +Widen the push init DR to **±10–13 cm** (matching barrier's magnitude) so the static overhead camera regains discriminative power and the wrist-view coverage broadens. DR is plumbed through the `PUSH_BOX_DR` environment variable so collection, training, and eval stay consistent (`envs/tasks/push/rl/train_env.py`). The end-to-end pipeline (re-train PPO → re-collect demos → re-convert to LeRobot → re-train ACT) is packaged as a one-shot script: + +```bash +conda activate isaaclab +./scripts/fix_push_widen_dr.sh # widen DR, retrain teacher, recollect, retrain ACT +``` + +Because demonstrations are generated automatically by the RL teacher, re-collection is cheap. **The after-fix success number is `_(pending fix run)_`.** The expected verification (per `experiments/act_push_failure/`) is that the camera ablation *inverts* — blank-overhead Δ grows and blank-wrist Δ shrinks — indicating the policy has switched to the robust overhead camera. No after-fix numbers are reported until that run completes. + +--- + +## Reproducing the analysis + +The failure analysis is dataset-only (no simulator) and runs in the `lerobot` env in minutes: + +```bash +conda activate lerobot +bash experiments/act_push_failure/run_all.sh +``` + +Outputs land in `experiments/act_push_failure/results/`: +- `push_summary.json` / `barrier_summary.json` — teacher-forcing L1 + per-camera ablation Δ (the numbers in §3 and §4). +- `push_replay.log` / `barrier_replay.log` — replay logs. +- `images/` — dumped training frames (overhead + wrist). + +OOD spatial analysis (§2) and the camera-sensitivity plot (§3) are regenerated by `tools/viz/eval_ood.py` and `tools/viz/push_camera_sensitivity.py` respectively. diff --git a/docs/state-machine.md b/docs/state-machine.md new file mode 100644 index 0000000..aa02b65 --- /dev/null +++ b/docs/state-machine.md @@ -0,0 +1,27 @@ +# State-machine oracle (pick & barrier) + +Pick and barrier demonstrations are generated by a privileged **Warp GPU state machine** +(`controllers/pick_place_cube_sm.py`, class `PickAndPlaceSm`). The kernel `infer_state_machine` runs +all N parallel environments on the GPU, advancing each independently through 10 states. The output is +an `(N, 8)` action stream — 7-DoF IK-absolute end-effector pose + 1 gripper command — at ~50 Hz. + +| State | Name | What it does | +|---|---|---| +| 0 | `REST` | hold at the initial pose (~0.2 s) | +| 1 | `APPROACH_ABOVE_OBJECT` | move to a point directly above the object | +| 2 | `APPROACH_OBJECT` | descend onto the object | +| 3 | `GRASP_OBJECT` | close the gripper | +| 4 | `LIFT_OBJECT` | lift to `lift_height` (raised for the barrier task) | +| 5 | `MOVE_TO_PLACE` | translate to above the place target | +| 6 | `PLACE` | descend and release | +| 7 | `LIFT_AFTER_PLACE` | retract upward to clear the placed object | +| 8 | `RETREAT` | return toward the rest pose | +| 9 | `DONE` | terminal; hold until the env resets | + +**Barrier via parameters, not new code.** The barrier task reuses the *same* state machine; clearing a +⅓-arm-height wall is achieved purely by raising `lift_height` / `approach_height` +(`envs/tasks/barrier/collect_env.py`) so the block travels horizontally above the barrier before +descending on the far side. Raise the barrier → raise those two heights to keep clearance. + +> Push uses no state machine — it is driven by a learned PPO teacher. See +> [architecture.md](architecture.md). diff --git a/envs/base/recorders.py b/envs/base/recorders.py index 0cc1416..ab354e2 100644 --- a/envs/base/recorders.py +++ b/envs/base/recorders.py @@ -22,7 +22,6 @@ from __future__ import annotations -from collections.abc import Sequence import torch diff --git a/envs/tasks/push/rl/action.py b/envs/tasks/push/rl/action.py index 1ace5bb..ca476fe 100644 --- a/envs/tasks/push/rl/action.py +++ b/envs/tasks/push/rl/action.py @@ -21,7 +21,6 @@ import torch from collections.abc import Sequence -from dataclasses import MISSING import isaaclab.utils.math as math_utils from isaaclab.utils import configclass diff --git a/envs/tasks/push/rl/train_env.py b/envs/tasks/push/rl/train_env.py index 568626e..6c44226 100644 --- a/envs/tasks/push/rl/train_env.py +++ b/envs/tasks/push/rl/train_env.py @@ -36,7 +36,6 @@ ) from envs.tasks.push.rl.action import PlanarPushActionCfg -from envs.tasks.push import PUSH_CORNER import envs.tasks.push.rl.mdp as pmdp diff --git a/eval/eval_runner.py b/eval/eval_runner.py index 1eb837c..552402b 100644 --- a/eval/eval_runner.py +++ b/eval/eval_runner.py @@ -291,7 +291,7 @@ def _blank_actions(self) -> torch.Tensor: @staticmethod def _print_header(): print(f"\n{'='*60}") - print(f" VLA Eval Harness — FrankaPickVLA") + print(" VLA Eval Harness — FrankaPickVLA") print(f"{'='*60}\n") print(f" {'Ep':>4} {'env':>3} {'result':>6} " f"{'steps':>6} {'success':>10} {'reason'}") diff --git a/eval/vla_wrapper.py b/eval/vla_wrapper.py index fe712e3..41a3ed5 100644 --- a/eval/vla_wrapper.py +++ b/eval/vla_wrapper.py @@ -262,7 +262,6 @@ def __init__( ) def predict_action(self, obs: dict) -> torch.Tensor: - import numpy as np import PIL.Image N = obs["joint_pos"].shape[0] @@ -287,7 +286,7 @@ def predict_action(self, obs: dict) -> torch.Tensor: return actions # (N, 8) @staticmethod - def _to_isaac_action(raw: "np.ndarray") -> torch.Tensor: + def _to_isaac_action(raw: "np.ndarray") -> torch.Tensor: # noqa: F821 (quoted forward-ref) """ 7-dim VLA output → 8-dim Isaac Lab IK-Abs. @@ -333,13 +332,13 @@ def __init__( place_pos: tuple[float, float, float], device: str = "cuda:0", oracle_pose: str = "gt", # "gt" | "noisy" - noise_cfg: "PoseNoiseCfg | None" = None, + noise_cfg: "PoseNoiseCfg | None" = None, # noqa: F821 (quoted forward-ref) seed: int | None = 0, ): if preset.make_sm is None: raise ValueError( - f"task preset 無 make_sm(push 走 RL 收集,無 SM oracle)→ " - f"SMOracleWrapper 目前僅支援 pick_place / barrier" + "task preset 無 make_sm(push 走 RL 收集,無 SM oracle)→ " + "SMOracleWrapper 目前僅支援 pick_place / barrier" ) from eval.perception.pose_noise import PoseNoiseCfg, sample_episode_offset diff --git a/experiments/act_push_failure/README.md b/experiments/act_push_failure/README.md index 4ae9481..9d00665 100644 --- a/experiments/act_push_failure/README.md +++ b/experiments/act_push_failure/README.md @@ -1,69 +1,79 @@ -# ACT Push 失效分析(可復現實驗包) +# ACT push failure — a reproducible root-cause analysis -> ACT 在 push 任務「**不推 box1、直接斜前奔 box2/桌角**」的根因分析。 -> 每個推論都對應一份可復現的證據。對照組:同 pipeline 但成功的 barrier 任務。 +> Why the push ACT policy **never touches box-1 and drives diagonally toward box-2 / the table corner**. +> Every claim below maps to a piece of reproducible evidence. Control group: the *same* pipeline on the +> barrier task, which succeeds. -## TL;DR 根因 -push policy **學成幾乎只依賴「隨手移動的 wrist 第一人稱相機」、幾乎不用靜態 overhead**。 -肇因是 **box1 init 隨機化只有 ±3cm(過窄)+ demo 短少**。 -closed-loop 時 wrist 視角一偏離 demo 就 OOD → covariate shift 螺旋 → 退回「不靠視覺、奔桌角」的 prior。 -barrier 因 DR 寬(±13/±7cm)、依賴穩健的 overhead,故同 pipeline 卻成功。 +## TL;DR — root cause ---- +The push policy learns to rely **almost entirely on the moving, first-person wrist camera and barely uses +the static overhead camera**. The upstream cause is **box-1 initial randomization of only ±3 cm (too narrow) +plus short demos**. In closed loop, the moment the wrist view drifts from the demos it is out-of-distribution +→ a covariate-shift spiral → the policy falls back to a vision-independent "run for the corner" prior. +Barrier, with wide randomization (±13/±7 cm) and reliance on the robust overhead camera, succeeds under the +identical pipeline. + +## How to reproduce -## 如何復現 ```bash -conda activate lerobot # 需 lerobot 0.5.2(與 eval 同 env) +conda activate lerobot # needs lerobot 0.5.2 (same env as eval) +bash scripts/bootstrap_assets.sh --minimal # fetch dataset + checkpoint if not already local bash experiments/act_push_failure/run_all.sh ``` -產出在 `results/`: + +Outputs land in `results/`: - `push_replay.log` / `push_summary.json` -- `barrier_replay.log` / `barrier_summary.json`(對照組) -- `images/push_{wrist,overhead}_*.png`(訓練影像) +- `barrier_replay.log` / `barrier_summary.json` (control group) +- `images/push_{wrist,overhead}_*.png` (training images) -> 純資料集 + checkpoint,**不需啟動 Isaac Sim**,數分鐘可跑完。 +> Pure dataset + checkpoint — **no simulator required**; finishes in a few minutes. ---- +## Experiment ↔ inference ↔ evidence -## 實驗 ↔ 推論 ↔ 證據 對照表 +| Experiment | What it does | Evidence (push) | Conclusion | +|---|---|---|---| +| **E1 Teacher-forcing** | feed ground-truth demo obs frame-by-frame; compare predicted vs recorded action | EE-xy L1 = **0.011 m**; overall 0.0087 | ✅ pipeline correct, the model *did* learn → rules out version/normalizer mismatch, under-training, mean-collapse | +| **E2 Per-camera ablation** | black out overhead / wrist separately | black-wrist Δ = **0.197**; black-overhead Δ = **0.038** | push relies **heavily on wrist**; rules out causal confusion (it over-uses vision, not ignores it) | +| **E2 where it goes** | where the policy drives when a camera fails | black-wrist → EE stalls at **[0.64, 0.25]** (past box-2, toward the corner) | exactly reproduces the failure symptom → "wrist failure ⇒ observed behavior" | +| **Control: barrier** | same script on the successful barrier task | black-overhead Δ = **0.089**; black-wrist Δ = **0.027** | barrier relies on the **robust overhead** → explains why barrier succeeds and push fails | +| **E3 dump images** | dump training wrist/overhead frames | wrist frames are non-black, genuine first-person view | training images are fine; the wrist view moves with the EE → fragile viewpoint | -| 實驗 | 做什麼 | 證據(push) | 排除/支持的結論 | -|------|--------|-------------|----------------| -| **E1 Teacher-forcing** | 逐幀餵 demo 真值 obs → 比 pred vs 記錄 action | EE xy L1 = **0.011 m**、整體 0.0087 | ✅ pipeline 正確、模型有學會 → 排除 **版本/normalizer 不一致、欠訓、mean-collapse** | -| **E2 逐相機 ablation** | 分別塗黑 overhead / wrist | 黑 wrist Δ=**0.197**;黑 overhead Δ=**0.038** | push **重度依賴 wrist**;排除 **causal confusion(忽略影像)**(反而過度依賴影像)| -| **E2 黑影像走向** | 看影像失效時 policy 往哪 | 黑 wrist → EE 停在 **[0.64, 0.25]**(越過 box2 奔桌角)| 完美**複製失敗症狀** → 證明「wrist 失效 ⇒ 觀察到的行為」 | -| **對照組 barrier** | 同腳本跑成功的 barrier | 黑 overhead Δ=**0.090**;黑 wrist Δ=**0.027** | barrier 依賴**穩健 overhead** → 解釋為何 barrier 成功、push 失敗 | -| **E3 dump 影像** | dump 訓練 wrist/overhead | wrist mean≈115(非黑)、為第一人稱視角 | 訓練影像正常;wrist 隨 EE 移動 → 視角脆弱 | +### Key numbers side by side (push vs barrier) -### 關鍵數字並排(push vs barrier) -| | 黑 overhead Δ | 黑 wrist Δ | 主要依賴 | eval 結果 | +| | black-overhead Δ | black-wrist Δ | primary reliance | eval result | |---|:---:|:---:|---|:---:| -| **push** | 0.038 | **0.197** | wrist(脆弱) | ❌ | -| **barrier** | **0.090** | 0.027 | overhead(穩健) | ✅ | - ---- - -## 為什麼 push 押 wrist、barrier 押 overhead(相機 config 完全相同) -| 因素 | push | barrier | -|------|------|---------| -| box init DR | **±3cm**(`envs/tasks/push/rl/train_env.py::PushEventCfg.reset_box1`)| **±13/±7cm**(`envs/tasks/barrier/collect_env.py::BARRIER_INIT_POSE_RANGE`)| -| demo 長度 × 數量 | ~40 frame × 50 | ~287 frame | -| 任務性質 | 低高度接觸推桿(wrist 貼近物體、與動作高度相關)| pick-place(overhead 看全局)| - -DR 太窄 → 靜態 overhead 視野在所有 demo 幾乎不變、無區辨力 → policy 改抓「會隨動作變化」的 wrist 當捷徑。 - ---- - -## 潛在解決方法(按 ROI) -1. **加大 push 收集 DR**:`PushEventCfg.reset_box1/reset_box2` 的 `pose_range` ±3cm → **±10~13cm**(對齊 barrier),用 RL expert 重收 demo(成本低)再重訓。 - → 已包成一鍵 pipeline:**`scripts/fix_push_widen_dr.sh`**(DR 由 `PUSH_BOX_DR` 環境變數控制;train/collect/eval 一致)。 - 先 `SPEEDRUN=1` 驗整條通,再跑完整版。跑完用本資料夾的 `act_camera_ablation.py` 驗證相機依賴是否反轉。 -2. **只留 overhead、拿掉 wrist** 重訓一版做快速驗證(若可跑 → 100% 坐實 wrist 依賴診斷)。 -3. 收更多、更長 demo;wrist 影像增強(crop/color jitter)。 -4. DAgger / 收集時加 action noise → 覆蓋偏離軌跡的 wrist 視角,直接對治 covariate shift。 - -## 檔案 -- `act_camera_ablation.py` — 主診斷(E1 teacher-forcing + E2 逐相機 ablation),輸出 JSON。 -- `dump_obs_images.py` — dump 訓練影像(E3)。 -- `run_all.sh` — 一鍵復現 push + barrier + 影像。 -- `results/` — 證據(log / json / png)。 +| **push** | 0.038 | **0.197** | wrist (fragile) | ❌ | +| **barrier** | **0.089** | 0.027 | overhead (robust) | ✅ | + +## Why push leans on wrist while barrier leans on overhead (identical camera config) + +| Factor | push | barrier | +|---|---|---| +| box init DR | **±3 cm** (`envs/tasks/push/rl/train_env.py`) | **±13/±7 cm** (`envs/tasks/barrier/collect_env.py`) | +| demo length × count | ~40 frames × 50 | ~287 frames | +| task nature | low-contact push (wrist near the object, strongly action-correlated) | pick-place (overhead sees the whole workspace) | + +Too-narrow DR → the static overhead view barely changes across demos → no discriminative signal → the policy +latches onto the wrist camera (which *does* change with the action) as a shortcut. + +## Potential fixes (by ROI) + +1. **Widen push-collection DR**: raise the box init `pose_range` from ±3 cm to **±10–13 cm** (aligning with + barrier), re-collect demos with the RL expert (cheap), and retrain. Packaged as one command — + **[`scripts/fix_push_widen_dr.sh`](../../scripts/fix_push_widen_dr.sh)** (DR via the `PUSH_BOX_DR` env var; + train / collect / eval stay consistent). Validate by re-running `act_camera_ablation.py` and checking that + the camera reliance flips. +2. **Force the robust view**: retrain overhead-only (drop wrist) as a fast confirmation of the diagnosis. +3. Collect more / longer demos; augment the wrist image (crop / color jitter). +4. **DAgger / action-noise during collection** → cover off-trajectory wrist views, directly attacking the + covariate shift. + +## Files + +- `act_camera_ablation.py` — the main diagnostic (E1 teacher-forcing + E2 per-camera ablation); writes JSON. +- `dump_obs_images.py` — dump training images (E3). +- `run_all.sh` — one-command reproduction (push + barrier control + images). +- `results/` — the evidence (logs / JSON / PNGs). + +> A narrative write-up of this analysis (with the before→after fix) lives in +> [`docs/case-study-push.md`](../../docs/case-study-push.md). diff --git a/experiments/act_push_failure/REPORT_SECTION.md b/experiments/act_push_failure/REPORT_SECTION.md deleted file mode 100644 index 50caefe..0000000 --- a/experiments/act_push_failure/REPORT_SECTION.md +++ /dev/null @@ -1,50 +0,0 @@ -# 報告段落草稿:ACT Push 失效分析 - -> 可直接貼進報告。建議放在 Results/Challenges 章節。引用的數字皆出自 -> `experiments/act_push_failure/results/`,可復現。 - ---- - -## Challenge: Camera-Reliance Shortcut in ACT Push Policy - -### Problem(問題) -以 RL expert 蒐集的 demo 訓練 ACT 執行 push 任務(將 box1 推向 box2、再把 box2 推到桌角)時, -學成的 policy **完全無法執行**:機械臂不去接觸 box1,而是從第一步就直接朝斜前方的 box2/桌角移動。 -相同的感知與訓練 pipeline 在 barrier(越障 pick-and-place)任務上卻能正常運作。 - -### Root Cause(根因) -push policy **學成幾乎只依賴隨末端執行器移動的 wrist(第一人稱)相機,而幾乎不使用靜態的 overhead 相機**。 -wrist 視角與動作高度耦合,在訓練分佈內極易擬合(low loss),但在 closed-loop 推論時, -末端軌跡一旦稍微偏離 demo,wrist 看到的就是訓練中未見過的視角(out-of-distribution), -導致動作誤差被放大、形成 covariate-shift 螺旋,最終退化成「不依賴視覺、直奔桌角」的先驗行為。 - -此相機依賴偏差源於資料分佈:push 的 box 初始隨機化僅 **±3 cm**,使靜態 overhead 相機的畫面 -在所有 demo 中幾乎不變、對「往哪推」缺乏區辨力,policy 因而轉向「會隨動作變化」的 wrist 相機作為捷徑。 -相對地,barrier 的初始隨機化達 **±13 / ±7 cm**,迫使 policy 使用視角穩健的 overhead 相機,故得以泛化成功。 - -### Analysis & Evidence(分析與證據) -分析以兩個純資料集實驗完成(不需模擬器),並以成功的 barrier 任務作為對照: - -1. **Teacher-forcing replay**:逐幀餵入 demo 的真值觀測,模型重現記錄動作的末端位置誤差僅 - **0.011 m**(整體 L1 0.0087)。證明推論 pipeline 正確、模型確實學會了動作, - 排除了版本/正規化不一致、欠訓、mean-collapse 等假設。 -2. **逐相機影像 ablation**:對同一幀分別將相機影像塗黑,量測預測位移—— - - | 任務 | 塗黑 overhead | 塗黑 wrist | 主要依賴 | eval 結果 | - |------|:---:|:---:|---|:---:| - | push | 0.038 | **0.197** | wrist(脆弱) | 失敗 | - | barrier | **0.090** | 0.027 | overhead(穩健) | 成功 | - - 塗黑 wrist 時,push policy 的末端目標退化到 **[0.64, 0.25]**(越過 box2 奔向桌角), - 與實際失效行為完全一致,直接證明「wrist 失效 ⇒ 觀察到的症狀」。 - -綜合:失效並非來自模型容量、資料品質或推論實作,而是 **policy 對脆弱的 egocentric 相機形成依賴捷徑**, -其上游成因為**過窄的初始狀態隨機化**。 - -### Potential Solutions(潛在解決方法) -1. **加大初始隨機化**:將 push 的 box 初始 DR 由 ±3 cm 提高至 ±10–13 cm(對齊 barrier), - 使 overhead 相機重獲區辨力、並擴大 wrist 視角覆蓋;由於 demo 由 RL expert 自動產生,重蒐集成本極低。 -2. **強制使用穩健視角**:移除 wrist、僅保留 overhead 重訓作為快速驗證;或對 wrist 影像做資料增強。 -3. **覆蓋離分佈狀態**:以 DAgger 或在蒐集時注入動作雜訊,讓 demo 涵蓋偏離軌跡的視角,直接緩解 covariate shift。 - -> 復現:`bash experiments/act_push_failure/run_all.sh`(詳見該資料夾 README)。 diff --git a/experiments/act_push_failure/results/barrier_summary.json b/experiments/act_push_failure/results/barrier_summary.json index b8aa749..ff4c6a4 100644 --- a/experiments/act_push_failure/results/barrier_summary.json +++ b/experiments/act_push_failure/results/barrier_summary.json @@ -3,7 +3,7 @@ "repo_id": "franka_barrier_v1", "episode": 0, "n_frames": 287, - "model_path": "/home/kevin786/Workspace/Project/Isaac_sim/_out/act/act_franka_barrier_v1_run_20260613_170049/checkpoints/last/pretrained_model", + "model_path": "_out/act/act_franka_barrier_v1/checkpoints/last/pretrained_model", "E1_teacher_forcing": { "mean_l1_all": 0.006865891627967358, "mean_l1_ee_xy": 0.006371738389134407, diff --git a/experiments/act_push_failure/results/push_summary.json b/experiments/act_push_failure/results/push_summary.json index c39df00..a89ed92 100644 --- a/experiments/act_push_failure/results/push_summary.json +++ b/experiments/act_push_failure/results/push_summary.json @@ -3,7 +3,7 @@ "repo_id": "franka_push_rl_v1", "episode": 0, "n_frames": 40, - "model_path": "/home/kevin786/Workspace/Project/Isaac_sim/_out/act/act_franka_push_rl_v1_run_20260613_170903/checkpoints/last/pretrained_model", + "model_path": "_out/act/act_franka_push_rl_v1/checkpoints/last/pretrained_model", "E1_teacher_forcing": { "mean_l1_all": 0.008742369711399078, "mean_l1_ee_xy": 0.01095487643033266, diff --git a/experiments/act_push_failure/run_all.sh b/experiments/act_push_failure/run_all.sh index b0fad89..42daa91 100644 --- a/experiments/act_push_failure/run_all.sh +++ b/experiments/act_push_failure/run_all.sh @@ -16,10 +16,17 @@ PROJ="$(cd "$HERE/../.." && pwd)" # Isaac_sim 專案根 RES="$HERE/results" mkdir -p "$RES/images" +# Resolve an ACT checkpoint by tag: prefer the canonical bootstrap path +# (_out/act/act_/...), else the newest timestamped training run. +resolve_ck() { # $1 = tag + local canon="$PROJ/_out/act/act_$1/checkpoints/last/pretrained_model" + if [ -d "$canon" ]; then echo "$canon"; return; fi + ls -d "$PROJ"/_out/act/act_"$1"_run_*/checkpoints/last/pretrained_model 2>/dev/null | sort | tail -1 +} PUSH_DS="$PROJ/_out/datasets/lerobot/franka_push_rl_v1" -PUSH_CK="$PROJ/_out/act/act_franka_push_rl_v1_run_20260613_170903/checkpoints/last/pretrained_model" +PUSH_CK="$(resolve_ck franka_push_rl_v1)" BAR_DS="$PROJ/_out/datasets/lerobot/franka_barrier_v1" -BAR_CK="$PROJ/_out/act/act_franka_barrier_v1_run_20260613_170049/checkpoints/last/pretrained_model" +BAR_CK="$(resolve_ck franka_barrier_v1)" echo "######## (E1+E2) PUSH ########" python "$HERE/act_camera_ablation.py" --tag push \ diff --git a/pyproject.toml b/pyproject.toml index b03d563..fe440e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,25 +3,38 @@ requires = ["setuptools>=61"] build-backend = "setuptools.build_meta" [project] -name = "isaac-sim-corvinus" +name = "sim2act" version = "0.1.0" -description = "Franka pick / push / barrier VLA demo-collection + eval pipeline on Isaac Lab" +description = "sim2act — a VLA simulation data engine: Franka pick / push / barrier demo collection, LeRobot v3.0 conversion, ACT training + closed-loop eval on Isaac Lab" requires-python = ">=3.10" -# 註:isaaclab / isaacsim / warp / torch 由 Isaac Lab 環境提供,不在此宣告。 -# 以下為純資料管線(在 isaaclab python 內)會用到的輕量相依。 +# Note: isaaclab / isaacsim / warp / torch are provided by the Isaac Lab environment and are +# intentionally NOT declared here. Below are the lightweight deps the pure data pipeline needs +# (importable from inside the isaaclab python). dependencies = [ "h5py", "numpy", ] [project.optional-dependencies] -# LeRobot v3.0 轉換(通常在獨立的 lerobot conda env 執行) +# LeRobot v3.0 conversion + ACT training (usually run in a separate `lerobot` conda env). lerobot = ["pandas", "pyarrow", "imageio[ffmpeg]"] -# 點雲視覺化工具 +# Point-cloud visualization tooling. viz = ["open3d"] +# Asset bootstrap (download datasets/checkpoints from the Hugging Face Hub). +hub = ["huggingface_hub"] [tool.setuptools.packages.find] -# 自動探索(含子套件 envs.base / envs.tasks.push.rl / envs.scenes ...)。 -# 安裝後 `from envs...` / `from eval...` 等不再需要 sys.path hack; -# 之後新增 task 子資料夾會被自動收錄,不必再手動維護清單。 -include = ["envs*", "controllers*", "eval*", "data*", "experimental*"] +# Auto-discover (incl. sub-packages envs.base / envs.tasks.push.rl / envs.scenes ...). +# After `pip install -e .`, `from envs...` / `from eval...` no longer need a sys.path hack; +# new task sub-folders are picked up automatically — no manual list to maintain. +include = ["envs*", "controllers*", "eval*", "data*"] + +[tool.ruff] +line-length = 120 +target-version = "py310" + +[tool.ruff.lint] +# Light but real: catch genuine bugs (pyflakes F) and syntax errors (E9) without +# reformatting research code or churning on whitespace/line-length. +select = ["E9", "F"] +ignore = [] diff --git a/scripts/bootstrap_assets.sh b/scripts/bootstrap_assets.sh new file mode 100644 index 0000000..f885361 --- /dev/null +++ b/scripts/bootstrap_assets.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# scripts/bootstrap_assets.sh +# ────────────────────────────────────────────────────────────────────────────── +# Download the datasets + checkpoints needed to run sim2act without regenerating +# them in-sim. Everything lands under _out/ (gitignored). +# +# Requires the `hub` extra (provides the `hf` CLI / huggingface_hub): +# pip install -e ".[hub]" +# +# Usage: +# bash scripts/bootstrap_assets.sh --minimal # just enough for the no-sim failure analysis +# bash scripts/bootstrap_assets.sh --all # all published datasets + checkpoints +# +# Env: +# HF_NAMESPACE Hugging Face user/org hosting the assets (default: CLM0215) +# ────────────────────────────────────────────────────────────────────────────── +set -euo pipefail +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$REPO" + +HF_NAMESPACE="${HF_NAMESPACE:-CLM0215}" +MODE="${1:---minimal}" + +command -v hf >/dev/null 2>&1 || { echo "❌ 'hf' not found. Install: pip install -e \".[hub]\""; exit 1; } + +dl() { # $1 = repo name $2 = dataset|model $3 = local dest dir + echo "↓ $2 ${HF_NAMESPACE}/$1 → $3" + hf download "${HF_NAMESPACE}/$1" --repo-type "$2" --local-dir "$3" >/dev/null +} + +echo "Bootstrapping assets (namespace: ${HF_NAMESPACE}, mode: ${MODE})" + +# ── minimal: the simulator-free failure analysis (experiments/act_push_failure/run_all.sh) ── +dl franka-push-rl-v1 dataset _out/datasets/lerobot/franka_push_rl_v1 +dl act-push-rl-v1 model _out/act/act_franka_push_rl_v1/checkpoints/last/pretrained_model +dl franka-barrier-v1 dataset _out/datasets/lerobot/franka_barrier_v1 +dl act-barrier-v1 model _out/act/act_franka_barrier_v1/checkpoints/last/pretrained_model + +if [ "$MODE" = "--all" ]; then + dl franka-pick-place dataset _out/datasets/lerobot/frank_pick_place + dl ppo-push-teacher model _out/rl/franka_push/exported + # (push v2 / after-fix assets are added here once the fix run is published.) +fi + +echo "✅ Bootstrap complete." +echo " Next: conda activate lerobot && bash experiments/act_push_failure/run_all.sh" diff --git a/scripts/collect/demos.py b/scripts/collect/demos.py index 5ddc176..3f31f61 100644 --- a/scripts/collect/demos.py +++ b/scripts/collect/demos.py @@ -175,7 +175,7 @@ def main(): # ── 完成 ────────────────────────────────────────────────────────────────── total_recorded = env.unwrapped.recorder_manager.exported_successful_episode_count print(f"\n{'='*55}") - print(f"[collect_demos] 完成!") + print("[collect_demos] 完成!") print(f" 成功 demo: {total_recorded}") print(f" 總 episodes: {n_episodes}") print(f" 成功率: {total_recorded / max(n_episodes, 1):.1%}") diff --git a/scripts/eval/oracle_noise_sweep.py b/scripts/eval/oracle_noise_sweep.py index da34101..ab8ca24 100644 --- a/scripts/eval/oracle_noise_sweep.py +++ b/scripts/eval/oracle_noise_sweep.py @@ -8,7 +8,7 @@ # 每個 σ 重建一個 SMOracleWrapper(新 SM + 新 per-episode 偏移),跑 EvalRunner。 # # 用法: -# conda activate isaaclab && cd /home/kevin786/Workspace/Project/Isaac_sim +# conda activate isaaclab && cd /path/to/sim2act # python scripts/eval/oracle_noise_sweep.py --task barrier \ # --sweep_axis xy --num_rollouts 40 --num_envs 4 --headless --enable_cameras # @@ -150,7 +150,7 @@ def main(): print(f"\n{'='*60}") print(f" Sweep done → {stem}.csv / .json") - print(f" σ(mm) → success:") + print(" σ(mm) → success:") for r in rows: print(f" {r['sigma_m']*1000:5.1f} {r['success_rate']:.1%}") print(f"{'='*60}\n") diff --git a/scripts/eval/policy.py b/scripts/eval/policy.py index 8d94fba..ca60341 100644 --- a/scripts/eval/policy.py +++ b/scripts/eval/policy.py @@ -136,7 +136,7 @@ from envs.tasks.task_presets import get_preset # per-task eval 場景 + 成功規格(單一事實來源) -def _make_policy(args, preset=None, dt=None, num_envs=None, place_pos=None) -> "VLAWrapper": +def _make_policy(args, preset=None, dt=None, num_envs=None, place_pos=None) -> "VLAWrapper": # noqa: F821 if args.policy == "dummy": return DummyVLAWrapper(device=args.device) diff --git a/scripts/eval/record_demos.sh b/scripts/eval/record_demos.sh index 148be8a..3939518 100755 --- a/scripts/eval/record_demos.sh +++ b/scripts/eval/record_demos.sh @@ -23,7 +23,7 @@ # ────────────────────────────────────────────────────────────────────────────── set -euo pipefail -REPO="/home/kevin786/Workspace/Project/Isaac_sim" +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" # repo root (this script lives in scripts/eval/) cd "$REPO" CKPT="${1:-${CKPT:-}}" diff --git a/scripts/fix_push_widen_dr.sh b/scripts/fix_push_widen_dr.sh index 307f14d..b885af1 100755 --- a/scripts/fix_push_widen_dr.sh +++ b/scripts/fix_push_widen_dr.sh @@ -34,7 +34,7 @@ # ────────────────────────────────────────────────────────────────────────────── set -euo pipefail -REPO="/home/kevin786/Workspace/Project/Isaac_sim" +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # repo root (this script lives in scripts/) cd "$REPO" TAG="${TAG:-franka_push_v2_dr12}" diff --git a/scripts/recollect_retrain_barrier.sh b/scripts/recollect_retrain_barrier.sh index f9f8078..c6f670e 100755 --- a/scripts/recollect_retrain_barrier.sh +++ b/scripts/recollect_retrain_barrier.sh @@ -25,7 +25,7 @@ # ────────────────────────────────────────────────────────────────────────────── set -euo pipefail -REPO="/home/kevin786/Workspace/Project/Isaac_sim" +REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # repo root (this script lives in scripts/) cd "$REPO" TAG="${TAG:-franka_barrier_v2}" diff --git a/scripts/rl/play_push.py b/scripts/rl/play_push.py index b06b61b..d2a4d4a 100644 --- a/scripts/rl/play_push.py +++ b/scripts/rl/play_push.py @@ -36,7 +36,6 @@ from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper, handle_deprecated_rsl_rl_cfg from envs.tasks.push.rl.ppo_cfg import PushPPORunnerCfg -import envs.tasks.push.rl.mdp as pmdp def _latest_checkpoint() -> str: diff --git a/tools/checks/hdf5_data.py b/tools/checks/hdf5_data.py index 7de74cb..0abe2f9 100644 --- a/tools/checks/hdf5_data.py +++ b/tools/checks/hdf5_data.py @@ -23,7 +23,7 @@ # ── structured obs ──────────────────────────────────────────── if "obs" in demo: obs = demo["obs"] - print(f" obs/") + print(" obs/") for obs_key in sorted(obs.keys()): item = obs[obs_key] print(f" {obs_key:<16}: {item.shape} {item.dtype}") @@ -56,7 +56,7 @@ status = "✅" if np.abs(tq).max() > 1e-3 else "⚠️ 全零" print(f" joint_torque : |τ|max={np.abs(tq).max():.3f} {status}") else: - print(f" obs: ❌ 缺少") + print(" obs: ❌ 缺少") # ── states ──────────────────────────────────────────────────── if "states" in demo: diff --git a/tools/viz/push_camera_sensitivity.py b/tools/viz/push_camera_sensitivity.py index e11eacc..675ba39 100644 --- a/tools/viz/push_camera_sensitivity.py +++ b/tools/viz/push_camera_sensitivity.py @@ -6,7 +6,7 @@ # Usage: # python tools/viz/push_camera_sensitivity.py --model_path /pretrained_model \ # --hdf5 _out/datasets/franka_push_rl_demos_clean/dataset.hdf5 --out -import argparse, glob +import argparse import numpy as np import torch