Skip to content

Commit 53f96c8

Browse files
committed
Add honest Franka stack instance-randomize slice
1 parent 5f25c59 commit 53f96c8

26 files changed

Lines changed: 1520 additions & 61 deletions

.github/workflows/mlx-macos.yml

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,8 @@ jobs:
242242
set_runtime_selection(resolve_runtime_selection(compute_backend="mlx", sim_backend="mac-sim", device="cpu"))
243243
rough_cfg = parse_env_cfg("Isaac-Velocity-Rough-H1-v0", device="cpu", num_envs=4)
244244
stack_cfg = parse_env_cfg("Isaac-Stack-Cube-RedGreenBlue-Franka-IK-Rel-v0", device="cpu", num_envs=4)
245+
stack_instance_cfg = parse_env_cfg("Isaac-Stack-Cube-Instance-Randomize-Franka-v0", device="cpu", num_envs=4)
246+
teddy_cfg = parse_env_cfg("Isaac-Lift-Teddy-Bear-Franka-IK-Abs-v0", device="cpu", num_envs=4)
245247
payload = evaluate_mlx_task(
246248
"h1-rough",
247249
num_envs=4,
@@ -254,6 +256,8 @@ jobs:
254256
artifact = {
255257
"rough_cfg_type": type(rough_cfg).__name__,
256258
"stack_cfg_type": type(stack_cfg).__name__,
259+
"stack_instance_cfg_type": type(stack_instance_cfg).__name__,
260+
"teddy_cfg_type": type(teddy_cfg).__name__,
257261
"trainable_tasks": list(list_trainable_mlx_tasks()),
258262
"eval_task": payload["task"],
259263
"episodes_completed": payload["episodes_completed"],
@@ -269,6 +273,14 @@ jobs:
269273
--task h1-rough \
270274
--num-envs 4 --episodes 1 --episode-length-s 0.25 --max-steps 128 --no-random-actions \
271275
--json-out logs/runtime/release-eval.json
276+
"$release_root/.venv-release/bin/isaaclab-mlx" evaluate \
277+
--task franka-teddy-bear-lift \
278+
--num-envs 4 --episodes 1 --episode-length-s 0.25 --max-steps 128 --no-random-actions \
279+
--json-out logs/runtime/release-teddy-eval.json
280+
"$release_root/.venv-release/bin/isaaclab-mlx" evaluate \
281+
--task franka-stack-instance-randomize \
282+
--num-envs 4 --episodes 1 --episode-length-s 0.25 --max-steps 128 --no-random-actions \
283+
--json-out logs/runtime/release-stack-instance-eval.json
272284
"$release_root/.venv-release/bin/isaaclab-mlx" train \
273285
--task cartpole \
274286
--num-envs 4 --updates 1 --rollout-steps 8 --epochs-per-update 1 --episode-length-s 0.25 \
@@ -309,6 +321,8 @@ jobs:
309321
source/isaaclab/test/backends/test_mac_anymal_c_rough.py \
310322
source/isaaclab/test/backends/test_mac_franka_reach.py \
311323
source/isaaclab/test/backends/test_mac_franka_lift.py \
324+
source/isaaclab/test/backends/test_mac_franka_teddy_bear_lift.py \
325+
source/isaaclab/test/backends/test_mac_franka_stack_instance_randomize.py \
312326
source/isaaclab/test/backends/test_mac_franka_stack.py \
313327
source/isaaclab/test/backends/test_mac_franka_stack_rgb.py \
314328
source/isaaclab/test/backends/test_mac_franka_cabinet.py \
@@ -417,6 +431,36 @@ jobs:
417431
--num-envs 8 --updates 1 --rollout-steps 8 --epochs-per-update 1 --episode-length-s 0.5 \
418432
--checkpoint logs/mlx/franka_lift_policy.npz --eval-interval 1
419433
434+
- name: Run MLX Franka teddy-bear lift smoke
435+
run: |
436+
PYTHONPATH=.:source/isaaclab .venv/bin/python \
437+
scripts/reinforcement_learning/mlx/evaluate_task.py \
438+
--task franka-teddy-bear-lift \
439+
--num-envs 8 --episodes 1 --episode-length-s 0.5 --max-steps 512 --no-random-actions
440+
441+
- name: Run MLX Franka teddy-bear lift training smoke
442+
run: |
443+
PYTHONPATH=.:source/isaaclab .venv/bin/python \
444+
scripts/reinforcement_learning/mlx/train_task.py \
445+
--task franka-teddy-bear-lift \
446+
--num-envs 8 --updates 1 --rollout-steps 8 --epochs-per-update 1 --episode-length-s 0.5 \
447+
--checkpoint logs/mlx/franka_teddy_bear_lift_policy.npz --eval-interval 1
448+
449+
- name: Run MLX Franka stack instance-randomize smoke
450+
run: |
451+
PYTHONPATH=.:source/isaaclab .venv/bin/python \
452+
scripts/reinforcement_learning/mlx/evaluate_task.py \
453+
--task franka-stack-instance-randomize \
454+
--num-envs 8 --episodes 1 --episode-length-s 0.5 --max-steps 512 --no-random-actions
455+
456+
- name: Run MLX Franka stack instance-randomize training smoke
457+
run: |
458+
PYTHONPATH=.:source/isaaclab .venv/bin/python \
459+
scripts/reinforcement_learning/mlx/train_task.py \
460+
--task franka-stack-instance-randomize \
461+
--num-envs 8 --updates 1 --rollout-steps 8 --epochs-per-update 1 --episode-length-s 0.5 \
462+
--checkpoint logs/mlx/franka_stack_instance_randomize_policy.npz --eval-interval 1
463+
420464
- name: Run MLX Franka stack smoke
421465
run: |
422466
PYTHONPATH=.:source/isaaclab .venv/bin/python \
@@ -491,6 +535,8 @@ jobs:
491535
assert "cartpole-depth-camera" in list_mlx_tasks()
492536
assert "franka-reach" in list_mlx_tasks()
493537
assert "franka-lift" in list_mlx_tasks()
538+
assert "franka-teddy-bear-lift" in list_mlx_tasks()
539+
assert "franka-stack-instance-randomize" in list_mlx_tasks()
494540
assert "franka-stack" in list_mlx_tasks()
495541
assert "franka-stack-rgb" in list_mlx_tasks()
496542
assert "franka-cabinet" in list_mlx_tasks()
@@ -653,10 +699,10 @@ jobs:
653699
assert len(payload["benchmarks"]) >= 5
654700
print(payload["tasks"])
655701
dashboard = json.loads(Path("logs/benchmarks/mlx/smoke-dashboard.json").read_text(encoding="utf-8"))
656-
assert dashboard["summary"]["rollout_task_count"] == 13
702+
assert dashboard["summary"]["rollout_task_count"] == 15
657703
assert dashboard["summary"]["training_task_count"] == 0
658704
trend = json.loads(Path("logs/benchmarks/mlx/smoke-trend.json").read_text(encoding="utf-8"))
659-
assert trend["summary"]["task_count"] == 13
705+
assert trend["summary"]["task_count"] == 15
660706
planner_payload = json.loads(Path("logs/planner/mac-planner-smoke.json").read_text(encoding="utf-8"))
661707
assert planner_payload["planner"]["implementation"] == "joint-space-linear-interpolation"
662708
assert planner_payload["plan"]["waypoint_count"] == 6
@@ -673,8 +719,8 @@ jobs:
673719
assert stereo_payload["depth_mm_mean"] is not None
674720
assert Path(stereo_payload["left_rgb_path"]).is_file()
675721
runtime_payload = json.loads(Path("logs/runtime/mac-runtime-diagnostics.json").read_text(encoding="utf-8"))
676-
assert runtime_payload["runtime"]["supported_tasks"]["public_task_count"] >= 15
677-
assert runtime_payload["sim"]["supported_tasks"]["current_mac_native_count"] >= 13
722+
assert runtime_payload["runtime"]["supported_tasks"]["public_task_count"] == 17
723+
assert runtime_payload["sim"]["supported_tasks"]["current_mac_native_count"] == 15
678724
sensor_payload = json.loads(Path("logs/benchmarks/mlx/sensor-smoke.json").read_text(encoding="utf-8"))
679725
assert sensor_payload["task_group"] == "sensor-mac-native"
680726
assert sensor_payload["cpu_fallback_detected"] is False
@@ -697,22 +743,34 @@ jobs:
697743
release_payload = json.loads(Path("logs/benchmarks/mlx/release_install_smoke.json").read_text(encoding="utf-8"))
698744
assert release_payload["rough_cfg_type"] == "MacH1RoughEnvCfg"
699745
assert release_payload["stack_cfg_type"] == "MacFrankaStackRgbEnvCfg"
746+
assert release_payload["stack_instance_cfg_type"] == "MacFrankaStackInstanceRandomizeEnvCfg"
747+
assert release_payload["teddy_cfg_type"] == "MacFrankaTeddyBearLiftEnvCfg"
700748
assert "anymal-c-rough" in release_payload["trainable_tasks"]
701749
assert "h1-rough" in release_payload["trainable_tasks"]
750+
assert "franka-teddy-bear-lift" in release_payload["trainable_tasks"]
751+
assert "franka-stack-instance-randomize" in release_payload["trainable_tasks"]
702752
assert release_payload["eval_task"] == "h1-rough"
703753
assert release_payload["episodes_completed"] == 1
704754
release_runtime_payload = json.loads(Path("logs/runtime/release-runtime-diagnostics.json").read_text(encoding="utf-8"))
705-
assert release_runtime_payload["runtime"]["supported_tasks"]["public_task_count"] >= 15
755+
assert release_runtime_payload["runtime"]["supported_tasks"]["public_task_count"] == 17
706756
release_eval_payload = json.loads(Path("logs/runtime/release-eval.json").read_text(encoding="utf-8"))
707757
assert release_eval_payload["task"] == "h1-rough"
708758
assert release_eval_payload["episodes_completed"] == 1
759+
release_teddy_eval_payload = json.loads(Path("logs/runtime/release-teddy-eval.json").read_text(encoding="utf-8"))
760+
assert release_teddy_eval_payload["task"] == "franka-teddy-bear-lift"
761+
assert release_teddy_eval_payload["episodes_completed"] == 1
762+
release_stack_instance_eval_payload = json.loads(
763+
Path("logs/runtime/release-stack-instance-eval.json").read_text(encoding="utf-8")
764+
)
765+
assert release_stack_instance_eval_payload["task"] == "franka-stack-instance-randomize"
766+
assert release_stack_instance_eval_payload["episodes_completed"] == 1
709767
release_train_payload = json.loads(Path("logs/runtime/release-train.json").read_text(encoding="utf-8"))
710768
assert release_train_payload["task"] == "cartpole"
711769
assert Path(release_train_payload["checkpoint_path"]).exists()
712770
full_payload = json.loads(Path("logs/benchmarks/mlx/full-smoke.json").read_text(encoding="utf-8"))
713771
assert full_payload["task_group"] == "full"
714772
full_dashboard = json.loads(Path("logs/benchmarks/mlx/full-smoke-dashboard.json").read_text(encoding="utf-8"))
715-
assert full_dashboard["summary"]["rollout_task_count"] == 17
773+
assert full_dashboard["summary"]["rollout_task_count"] == 19
716774
assert full_dashboard["summary"]["training_task_count"] == 1
717775
full_trend = json.loads(Path("logs/benchmarks/mlx/full-smoke-trend.json").read_text(encoding="utf-8"))
718776
assert any(entry["kind"] == "training" for entry in full_trend["tasks"])

PORT_TO_MLX_TODO.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ without pausing for replanning after every small success.
9797
- `DONE` Stereo/depth smoke now validates raw capture artifacts before processing and writes a machine-checkable JSON summary artifact
9898
- `DONE` `uv run scripts/bootstrap_uv_mlx.py` now bootstraps the public MLX/mac editable environment in one command
9999
- `DONE` Upstream-compatible Franka reach/stack/open-drawer controller variants now resolve to the canonical mac-native manipulation slices through the lazy task registry, public MLX wrapper, and installed CLI without aliasing heavier visuomotor or blueprint task families
100-
- `DONE` Upstream-compatible Franka lift IK variants now resolve to the canonical mac-native lift slice, while teddy-bear lift and richer Franka stack visuomotor/cosmos/blueprint/skillgen/bin-mimic families remain discoverable through explicit `sim-backend=isaacsim` gating on mac
100+
- `DONE` Upstream-compatible Franka lift IK variants now resolve to the canonical mac-native lift slice, while richer Franka stack visuomotor/cosmos/blueprint/skillgen/bin-mimic families remain discoverable through explicit `sim-backend=isaacsim` gating on mac
101+
- `DONE` Seventh trainable Franka manipulation slice landed for `Isaac-Lift-Teddy-Bear-Franka-IK-Abs-v0` by reusing the shared analytic lift substrate, public MLX wrapper/CLI support, benchmark coverage, semantic baseline refresh, and focused backend tests instead of over-claiming a new simulator family
102+
- `DONE` Eighth trainable Franka manipulation slice landed for `Isaac-Stack-Cube-Instance-Randomize-Franka-v0` by keeping the shared analytic stack substrate but adding explicit variant-id observations, deterministic distinct support/movable object sampling, public MLX wrapper/CLI support, benchmark coverage, semantic baseline refresh, and focused backend tests instead of dishonestly aliasing it to plain stack
101103

102104
## Phase A: Import And Packaging Safety
103105

@@ -750,12 +752,12 @@ without pausing for replanning after every small success.
750752
This queue exists so work can continue without waiting for a new plan. The documented v1 board above is now closed for the current public MLX/mac slice, so the next queue is follow-on parity work:
751753

752754
- Hardware validation is now done for the backend-local stereo path against live ZED 2i capture through a camera-authorized Terminal host plus `zed-sdk-mlx`; retained host-local probe artifacts include `/tmp/isaaclab-zed-probe-live-final.json` and `/tmp/isaaclab-zed-probe-live-final.yuv`.
753-
- Port the next manipulation milestone beyond the current six trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
755+
- Port the next manipulation milestone beyond the current eight trainable Franka slices, likely a richer cabinet/drawer variant or the next multi-object manipulation workflow.
754756
- Replace the next remaining locomotion or contact/support `mx.compile` helper with a true custom Metal kernel only after the root-step tranche proves benchmark-positive and semantically stable.
755757
- Grow the planner/ROS prototypes carefully: richer process/message interoperability layers around the new world-state and joint-trajectory envelopes while still avoiding CUDA/NITROS assumptions.
756758
- Keep the generic runtime metadata honest: only advertise generic sensor/runtime capabilities that are actually exposed through backend-neutral APIs, and push task-specific or tooling-only support into explicit diagnostic fields instead of broad parity flags.
757759
- Keep manipulation compatibility aliasing honest: widen upstream task-ID coverage only where the reduced mac-native slice still matches the observation/action/checkpoint contract, and keep heavier visuomotor / blueprint / skillgen families explicitly gated instead of quietly remapping them.
758-
- The next manipulation milestone should be a genuinely new reduced mac-native task, not more aliasing. The honest alias/gating boundary for the current Franka family is now in place.
760+
- The next manipulation milestone should stay genuinely new. The honest alias/gating boundary for the current Franka family now includes the separate instance-randomized stack slice, so further progress should come from another reduced mac-native task rather than re-aliasing richer upstream families.
759761

760762
## Validation Commands
761763

0 commit comments

Comments
 (0)