From 076a27ab5490e8e10b86d473a14c70544b428d9d Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Tue, 2 Dec 2025 17:22:11 -0800 Subject: [PATCH 01/37] Update pick_place configs and tasks --- .gitignore | 3 +++ .../rl/fast_td3/configs/pick_place.yaml | 2 +- roboverse_learn/rl/fast_td3/configs/track.yaml | 4 ++-- roboverse_pack/tasks/pick_place/README.md | 15 +++++++++------ roboverse_pack/tasks/pick_place/track.py | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index a6e1e0bf5..faaacc0b8 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,6 @@ id_rsa* # for test cache .pytest_cache/ + +*.pkl +*.pt \ No newline at end of file diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place.yaml index 004f1f2d2..065d7cd8c 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place.yaml @@ -4,7 +4,7 @@ # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- -sim: "isaacsim" +sim: "isaacgym" robots: ["franka"] task: "pick_place.approach_grasp_simple" decimation: 4 diff --git a/roboverse_learn/rl/fast_td3/configs/track.yaml b/roboverse_learn/rl/fast_td3/configs/track.yaml index f7aacae09..b06adffb0 100644 --- a/roboverse_learn/rl/fast_td3/configs/track.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track.yaml @@ -5,7 +5,7 @@ # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- -sim: "isaacsim" +sim: "isaacgym" robots: ["franka"] task: "pick_place.track" decimation: 4 @@ -14,7 +14,7 @@ headless: True # State file path for track task (pkl file path for grasp states) # If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE -state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" +state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" # ------------------------------------------------------------------------------- # Seeds & Device diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index 877acf81e..9851f0def 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -15,8 +15,7 @@ The task is split into two stages: Train the first stage to learn approach and grasp: ```bash -cd roboverse_learn/rl/fast_td3 -python train.py --config pick_place.yaml +python -m roboverse_learn.rl.fast_td3.train --config pick_place.yaml ``` This will generate checkpoints in the output directory. Note the checkpoint path for the next step. @@ -26,13 +25,15 @@ This will generate checkpoints in the output directory. Note the checkpoint path Evaluate the trained model and collect stable grasp states and first-half trajectories: ```bash -python evaluate_lift.py \ - --checkpoint models/pick_place.approach_grasp_simple_1210000.pt \ +python -m roboverse_learn.rl.fast_td3.evaluate_lift \ + --checkpoint models/pick_place.approach_grasp_simple_65000.pt \ --target_count 100 \ --state_dir eval_states \ --traj_dir eval_trajs ``` +BUG: Success rate more than 100% + This generates: - **States file**: `eval_states/pick_place.approach_grasp_simple_franka_lift_states_*.pkl` (stable grasp states) - **Trajectories**: First-half trajectories in `eval_trajs/` @@ -42,7 +43,7 @@ This generates: Load the collected states as initial states for track training: ```bash -python train.py --config track.yaml +python -m roboverse_learn.rl.fast_td3.train --config track.yaml ``` Make sure `track.yaml` has the correct `state_file_path` pointing to the states file from Stage 2: @@ -51,12 +52,14 @@ Make sure `track.yaml` has the correct `state_file_path` pointing to the states state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" ``` +BUG: state_file_path is hard-coded + ### Stage 4: Evaluate Track Evaluate the track task to get second-half trajectories: ```bash -python evaluate.py --checkpoint models/pick_place.track_*.pt +python -m roboverse_learn.rl.fast_td3.evaluate --checkpoint models/pick_place.track_*.pt ``` ### Stage 5: Merge Trajectories diff --git a/roboverse_pack/tasks/pick_place/track.py b/roboverse_pack/tasks/pick_place/track.py index 9bd7012d9..0fb2e33a8 100644 --- a/roboverse_pack/tasks/pick_place/track.py +++ b/roboverse_pack/tasks/pick_place/track.py @@ -230,7 +230,7 @@ class PickPlaceTrack(PickPlaceBase): def __init__(self, scenario, device=None): self.state_file_path = ( - "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" + "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" ) self._loaded_states = None From 81db6d76fa1f332ad255cf06de6cb766bb7a8f6c Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Fri, 5 Dec 2025 21:48:58 -0800 Subject: [PATCH 02/37] Fix bug that success rate is more than 100% in evaluate_lift.py; Fixed hard-coded path in track.py --- .gitignore | 3 ++- roboverse_learn/rl/fast_td3/evaluate_lift.py | 17 ++++++++++++++++- roboverse_pack/tasks/pick_place/README.md | 4 ---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index faaacc0b8..c35f58d01 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,5 @@ id_rsa* .pytest_cache/ *.pkl -*.pt \ No newline at end of file +*.pt +output \ No newline at end of file diff --git a/roboverse_learn/rl/fast_td3/evaluate_lift.py b/roboverse_learn/rl/fast_td3/evaluate_lift.py index a7e7e5ae8..f83180311 100644 --- a/roboverse_learn/rl/fast_td3/evaluate_lift.py +++ b/roboverse_learn/rl/fast_td3/evaluate_lift.py @@ -183,6 +183,11 @@ def evaluate_lift_collection( current_episode_init_state[i] = None episodes_completed = 0 + # Track how many episodes produced at least one successful lift + successful_episodes_count = 0 + # Per-env flag indicating whether the current episode already had a success + success_in_episode = {i: False for i in range(num_eval_envs)} + current_returns = torch.zeros(num_eval_envs, device=device) current_lengths = torch.zeros(num_eval_envs, device=device) done_masks = torch.zeros(num_eval_envs, dtype=torch.bool, device=device) @@ -263,6 +268,11 @@ def evaluate_lift_collection( collected_trajs.append(traj_data_serializable) collected_states.append(state_data_serializable) + # Mark episode as successful (count at most once per episode) + if not success_in_episode[i]: + success_in_episode[i] = True + successful_episodes_count += 1 + log.info( f"[Env {i}] Collected trajectory {len(collected_trajs)} " f"(lift maintained {lift_frame_count[i]} frames, total steps: {len(current_episode_actions[i])})" @@ -300,6 +310,8 @@ def evaluate_lift_collection( current_episode_init_state[i] = None current_returns[i] = 0 current_lengths[i] = 0 + # reset per-episode success flag for next episode + success_in_episode[i] = False done_masks = torch.logical_or(done_masks, dones) @@ -315,6 +327,8 @@ def evaluate_lift_collection( current_episode_actions[i] = [] current_episode_states[i] = [] current_episode_init_state[i] = extract_state_dict(env, scenario, env_idx=i) + # reset per-episode success flag after full reset + success_in_episode[i] = False else: obs = next_obs @@ -347,7 +361,8 @@ def evaluate_lift_collection( "collected_count": len(collected_trajs), "target_count": target_count, "episodes_completed": episodes_completed, - "success_rate": len(collected_trajs) / episodes_completed if episodes_completed > 0 else 0.0, + # success_rate = successful simulations / total simulations (episodes) + "success_rate": successful_episodes_count / episodes_completed if episodes_completed > 0 else 0.0, } return stats diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index 9851f0def..9c0457e25 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -32,8 +32,6 @@ python -m roboverse_learn.rl.fast_td3.evaluate_lift \ --traj_dir eval_trajs ``` -BUG: Success rate more than 100% - This generates: - **States file**: `eval_states/pick_place.approach_grasp_simple_franka_lift_states_*.pkl` (stable grasp states) - **Trajectories**: First-half trajectories in `eval_trajs/` @@ -52,8 +50,6 @@ Make sure `track.yaml` has the correct `state_file_path` pointing to the states state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" ``` -BUG: state_file_path is hard-coded - ### Stage 4: Evaluate Track Evaluate the track task to get second-half trajectories: From 17a74b332f29d5e8b88db66c324fd04303073226 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Fri, 5 Dec 2025 22:07:29 -0800 Subject: [PATCH 03/37] add contributor --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index fc6f553bb..528f39029 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -47,6 +47,7 @@ Guidelines for modifications: * Yutong Liang * Yuyang Li * Zhigen Zhao +* Hanchu Zhou ## Acknowledgements From c0699a8fb178bec55ecb8b5ecb294597149ca1d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 6 Dec 2025 06:08:37 +0000 Subject: [PATCH 04/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c35f58d01..8ccc3c121 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,4 @@ id_rsa* *.pkl *.pt -output \ No newline at end of file +output From e0ceed67ebd62f34910e87dc909d464a94aedf3d Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Wed, 10 Dec 2025 16:10:41 -0800 Subject: [PATCH 05/37] Fixing success rate bug; Enable more flexible evaluation that user can create less number of environment in evaluation --- roboverse_learn/rl/fast_td3/evaluate.py | 83 +++++++++++++++++++- roboverse_learn/rl/fast_td3/evaluate_lift.py | 17 +++- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/roboverse_learn/rl/fast_td3/evaluate.py b/roboverse_learn/rl/fast_td3/evaluate.py index 288595042..0a2d984a4 100644 --- a/roboverse_learn/rl/fast_td3/evaluate.py +++ b/roboverse_learn/rl/fast_td3/evaluate.py @@ -421,6 +421,66 @@ def evaluate( return stats +def _adjust_state_dict_for_model(checkpoint_state: dict, model: torch.nn.Module): + """Adjust tensors from checkpoint_state to better match model.state_dict() shapes. + + - If a tensor differs only in the leading (0-th) dimension and the remaining dims match, + we slice or repeat rows to match the model shape. + - If total numel matches, we reshape. + - Otherwise we fall back to the model's own parameter to avoid shape errors. + """ + model_state = model.state_dict() + new_state = {} + for k, v in checkpoint_state.items(): + if k not in model_state: + # keep unknown keys (they may be used elsewhere) + new_state[k] = v + continue + + mv = model_state[k] + # Only handle tensors; keep other items as-is + if not isinstance(v, torch.Tensor) or not isinstance(mv, torch.Tensor): + new_state[k] = v + continue + + if v.shape == mv.shape: + new_state[k] = v + continue + + # Case: same trailing dims, mismatched leading dim (common when num_envs differs) + if v.ndim == mv.ndim and v.shape[1:] == mv.shape[1:]: + desired = mv.shape[0] + src = v + if src.shape[0] >= desired: + new_state[k] = src[:desired].to(mv.device).clone() + else: + # Repeat rows to reach desired size then slice + reps = (desired + src.shape[0] - 1) // src.shape[0] + tiled = src.repeat(reps, *([1] * (src.ndim - 1))) + new_state[k] = tiled[:desired].to(mv.device).clone() + log.info(f"Adjusted checkpoint param '{k}' from {v.shape} -> {new_state[k].shape}") + continue + + # If total elements match, reshape + if v.numel() == mv.numel(): + try: + new_state[k] = v.reshape(mv.shape).to(mv.device).clone() + log.info(f"Reshaped checkpoint param '{k}' from {v.shape} -> {mv.shape}") + continue + except Exception: + pass + + # Last resort: try broadcasting/expanding + try: + new_state[k] = v.to(mv.device).expand_as(mv).clone() + log.info(f"Expanded checkpoint param '{k}' from {v.shape} -> {mv.shape}") + continue + except Exception: + log.warning(f"Could not match shape for param '{k}' ({v.shape} -> {mv.shape}); keeping model init") + new_state[k] = mv + return new_state + + def main(): parser = argparse.ArgumentParser(description='FastTD3 Evaluation') parser.add_argument('--checkpoint', type=str, default='roboverse_data/models/walk_1400.pt', @@ -524,9 +584,26 @@ def main(): obs_normalizer = EmpiricalNormalization(shape=n_obs, device=device) # Load weights - actor.load_state_dict(checkpoint["actor_state_dict"]) - if checkpoint.get("obs_normalizer_state"): - obs_normalizer.load_state_dict(checkpoint["obs_normalizer_state"]) + # Safely adjust checkpoint actor state to match current model shapes (handles num_envs mismatch) + ck_actor_state = checkpoint.get("actor_state_dict", {}) + if ck_actor_state: + try: + adjusted = _adjust_state_dict_for_model(ck_actor_state, actor) + # load non-strictly to allow missing/extra keys + actor.load_state_dict(adjusted, strict=False) + except Exception as e: + log.exception("Failed to load actor_state_dict with adjustment, falling back to strict load: %s", e) + actor.load_state_dict(ck_actor_state, strict=False) + else: + log.warning("No actor_state_dict present in checkpoint") + + # Load obs normalizer safely + try: + obs_state = checkpoint.get("obs_normalizer_state") + if obs_state: + obs_normalizer.load_state_dict(obs_state) + except Exception: + log.warning("Failed to load obs_normalizer_state from checkpoint; skipping") # Setup AMP amp_enabled = config.get("amp", False) and torch.cuda.is_available() diff --git a/roboverse_learn/rl/fast_td3/evaluate_lift.py b/roboverse_learn/rl/fast_td3/evaluate_lift.py index f83180311..ba613cb31 100644 --- a/roboverse_learn/rl/fast_td3/evaluate_lift.py +++ b/roboverse_learn/rl/fast_td3/evaluate_lift.py @@ -332,6 +332,15 @@ def evaluate_lift_collection( else: obs = next_obs + # Treat each active env as an attempted episode, and count it as successful if it already + active_envs = (~done_masks).sum().item() if 'done_masks' in locals() else 0 + successes_in_active = sum( + 1 for i in range(num_eval_envs) + if 'done_masks' in locals() and not done_masks[i] and success_in_episode.get(i, False) + ) + attempted_episodes = episodes_completed + active_envs + total_successful_episodes = successful_episodes_count + successes_in_active + if len(collected_trajs) > 0: os.makedirs(traj_dir, exist_ok=True) os.makedirs(state_dir, exist_ok=True) @@ -357,12 +366,14 @@ def evaluate_lift_collection( log.info(f" - State count: {len(collected_states)}") else: log.warning("No successful trajectories collected") + # Success rate: fraction of attempted episodes (completed + in-progress when we stopped) + denom = max(attempted_episodes, 1) + success_rate = min(total_successful_episodes, denom) / denom stats = { "collected_count": len(collected_trajs), "target_count": target_count, - "episodes_completed": episodes_completed, - # success_rate = successful simulations / total simulations (episodes) - "success_rate": successful_episodes_count / episodes_completed if episodes_completed > 0 else 0.0, + "episodes_completed": attempted_episodes, + "success_rate": success_rate, } return stats From 066596d0a9f9c32f4c9d49827bcfeb10a9a25713 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Wed, 10 Dec 2025 17:12:58 -0800 Subject: [PATCH 06/37] Make training script individual; Add gripper downward reward --- ...pick_place.yaml => pick_place_banana.yaml} | 2 +- .../configs/pick_place_screwdriver.yaml | 87 ++++ .../configs/{track.yaml => track_banana.yaml} | 6 +- .../fast_td3/configs/track_screwdriver.yaml | 93 ++++ .../rl/fast_td3/replay_lift_states.py | 2 +- .../tasks/pick_place/approach_grasp_banana.py | 480 ++++++++++++++++++ ...grasp.py => approach_grasp_screwdriver.py} | 142 ++++-- .../pick_place/{track.py => track_banana.py} | 99 +++- .../tasks/pick_place/track_screwdriver.py | 447 ++++++++++++++++ 9 files changed, 1300 insertions(+), 58 deletions(-) rename roboverse_learn/rl/fast_td3/configs/{pick_place.yaml => pick_place_banana.yaml} (98%) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml rename roboverse_learn/rl/fast_td3/configs/{track.yaml => track_banana.yaml} (94%) create mode 100644 roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana.py rename roboverse_pack/tasks/pick_place/{approach_grasp.py => approach_grasp_screwdriver.py} (70%) rename roboverse_pack/tasks/pick_place/{track.py => track_banana.py} (75%) create mode 100644 roboverse_pack/tasks/pick_place/track_screwdriver.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml similarity index 98% rename from roboverse_learn/rl/fast_td3/configs/pick_place.yaml rename to roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml index 065d7cd8c..43d6f5562 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml @@ -6,7 +6,7 @@ # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.approach_grasp_simple" +task: "pick_place.approach_grasp_banana" decimation: 4 train_or_eval: "train" headless: True diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml new file mode 100644 index 000000000..3bb351508 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml @@ -0,0 +1,87 @@ +# Base Configuration for FastTD3 Training +# Default configuration with IsaacGym simulator and H1 humanoid robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_screwdriver" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3" +use_wandb: false +checkpoint_path: null +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml similarity index 94% rename from roboverse_learn/rl/fast_td3/configs/track.yaml rename to roboverse_learn/rl/fast_td3/configs/track_banana.yaml index b06adffb0..4fe16b044 100644 --- a/roboverse_learn/rl/fast_td3/configs/track.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -7,14 +7,14 @@ # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.track" +task: "pick_place.track_banana" decimation: 4 train_or_eval: "train" -headless: True +headless: False # State file path for track task (pkl file path for grasp states) # If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE -state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" +state_file_path: "eval_states/pick_place.approach_grasp_banana_franka_lift_states_100states_20251210_153518.pkl" # ------------------------------------------------------------------------------- # Seeds & Device diff --git a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml new file mode 100644 index 000000000..ea9360e3f --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml @@ -0,0 +1,93 @@ +# Configuration for FastTD3 Training - Track Task +# Stage 3: Track task - for training trajectory tracking +# Starts from saved grasp states, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_screwdriver" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_screwdriver_franka_lift_states_100states_20251210_154907.pkl" + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 + +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track" +exp_name: "pick_place_track" +use_wandb: false +checkpoint_path: null +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/replay_lift_states.py b/roboverse_learn/rl/fast_td3/replay_lift_states.py index aad63f46d..6a77106df 100644 --- a/roboverse_learn/rl/fast_td3/replay_lift_states.py +++ b/roboverse_learn/rl/fast_td3/replay_lift_states.py @@ -36,7 +36,7 @@ from metasim.scenario.cameras import PinholeCameraCfg from metasim.task.registry import get_task_class -from roboverse_pack.tasks.pick_place.track import convert_state_dict_to_initial_state +from roboverse_pack.tasks.pick_place.track_banana import convert_state_dict_to_initial_state def load_states_from_pkl(pkl_path: str): diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py new file mode 100644 index 000000000..a8e6ed6c4 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -0,0 +1,480 @@ +"""Stage 1: Simple Approach and Grasp task with gripper control. + +This task focuses on learning to approach the object, grasp it with gripper, and lift it. +Simple gripper control: close when near the object. +""" + +from __future__ import annotations + +from copy import deepcopy + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +@register_task("pick_place.approach_grasp_banana", "pick_place_approach_grasp_banana") +class PickPlaceApproachGraspBanana(PickPlaceBase): + """Simple Approach and Grasp task with gripper control. + + This task focuses on: + - Approaching the object + - Grasping the object with simple gripper control (close when near) + + Success condition: Object is grasped (reward given when entering grasp state). + Episode terminates if object is released. + """ + + GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing + GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp + + # Joint2 lift parameters (for franka: panda_joint2) + JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) + JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control + JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step + + DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) + DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ + "gripper_approach": 0.5, + "grasp_reward": 4.0, + }) + DEFAULT_CONFIG_SIMPLE["grasp_config"] = { + "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, + "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, + } + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="screw_driver", + scale=(1.5, 1.5, 1.5), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Placeholders needed during super().__init__ (reset may be called there) + self.object_grasped = None + self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 + self._grasp_notified = None + self._distance_history = None # History buffer for stable grasp check + self.joint2_name = "panda_joint2" + self.joint2_index = None + self.initial_joint_pos = None + + super().__init__(scenario, device) + + # Override reward functions for this task + self.reward_functions = [ + self._reward_gripper_approach, + self._reward_grasp, + self._reward_gripper_downward_alignment, + ] + self.reward_weights = [ + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + 0.1, # weight for keeping the gripper pointing downward + ] + + # Get config values + grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] + self.grasp_check_distance = grasp_config["grasp_check_distance"] + self.gripper_close_distance = grasp_config["gripper_close_distance"] + + # Initialize tracking buffers + self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + + # Find joint2 index + joint_names = self.handler.get_joint_names(self.robot_name, sort=True) + if self.joint2_name in joint_names: + self.joint2_index = joint_names.index(self.joint2_name) + else: + log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") + + def reset(self, env_ids=None): + """Reset environment and tracking variables.""" + obs, info = super().reset(env_ids=env_ids) + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + + # Reset grasp tracking + self.object_grasped[env_ids_tensor] = False + if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + else: + self._grasp_notified[env_ids_tensor] = False + self._distance_history[env_ids_tensor] = float("inf") + + # Store initial joint positions if not already stored + if self.initial_joint_pos is None: + states = self.handler.get_states(mode="tensor") + self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() + + return obs, info + + def step(self, actions): + """Step with delta control and simple gripper control.""" + current_states = self.handler.get_states(mode="tensor") + box_pos = current_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(current_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near object + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + # Call RLTaskEnv.step directly + # Note: reward functions will be called inside super().step() + # and they will compute newly_grasped by comparing current state with self.object_grasped + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step (for next step's comparison) + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_grasped] = True + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_released] = False + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _apply_simple_gripper_control(self, actions, gripper_box_dist): + """Simple gripper control: close when near object.""" + # Close gripper when close to object + gripper_close = gripper_box_dist < self.gripper_close_distance + gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed + gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open + + # Set gripper joints + for gripper_idx in self.gripper_joint_indices: + actions[:, gripper_idx] = torch.where( + gripper_close, + gripper_value_close, + gripper_value_open, + ) + + return actions + + def _apply_joint2_lift_control(self, actions, current_states): + """Apply joint2 lift control when object is grasped.""" + if self.initial_joint_pos is None: + self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() + + joint_pos = current_states.robots[self.robot_name].joint_pos + joint2_idx = self.joint2_index + + # Target position: initial position + lift offset (positive offset lifts up) + target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET + joint_error = target_lift - joint_pos[:, joint2_idx] + + # Apply proportional control with max delta limit + desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error + delta = torch.clamp( + desired - joint_pos[:, joint2_idx], + -self.JOINT2_LIFT_MAX_DELTA, + self.JOINT2_LIFT_MAX_DELTA, + ) + joint2_value = torch.clamp( + joint_pos[:, joint2_idx] + delta, + self._action_low[joint2_idx], + self._action_high[joint2_idx], + ) + + # Apply lift control only to environments where object is grasped + actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] + + return actions + + def _compute_grasp_state(self, states): + """Compute if object is grasped (requires 5 stable frames based on distance only).""" + box_pos = states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _reward_gripper_approach(self, env_states) -> torch.Tensor: + """Reward for gripper approaching the box.""" + box_pos = env_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(env_states) + gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) + + approach_reward_far = 1 - torch.tanh(gripper_box_dist) + approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) + return approach_reward_far + approach_reward_near + + def _reward_grasp(self, env_states) -> torch.Tensor: + """Reward for maintaining grasp state (continuous reward while grasped).""" + # Use cached grasp state (computed in step method) + return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments.""" + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.220000, 0.200000, 0.955000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "screw_driver": { + "pos": torch.tensor([0.530000, -0.410000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.280000, 0.825000]), + "rot": torch.tensor([0.889292, -0.000000, 0.000001, -0.457338]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.260000, -0.240000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.270000, -0.190000, 0.970000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.260000, -0.140000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.250000, -0.030000, 1.240000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.310000, 0.190000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.475731, -0.000000, -0.000001, 0.879590]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py similarity index 70% rename from roboverse_pack/tasks/pick_place/approach_grasp.py rename to roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index c2500797d..d72a14d50 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -12,14 +12,15 @@ from loguru import logger as log from metasim.constants import PhysicStateType -from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase -@register_task("pick_place.approach_grasp_simple", "pick_place_approach_grasp_simple") -class PickPlaceApproachGraspSimple(PickPlaceBase): +@register_task("pick_place.approach_grasp_screwdriver", "pick_place_approach_screwdriver") +class PickPlaceApproachGraspScrewDriver(PickPlaceBase): """Simple Approach and Grasp task with gripper control. This task focuses on: @@ -50,20 +51,61 @@ class PickPlaceApproachGraspSimple(PickPlaceBase): scenario = ScenarioCfg( objects=[ - PrimitiveCubeCfg( + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( name="object", - size=(0.04, 0.04, 0.04), - mass=0.02, + scale=(1.5, 1.5, 1.5), physics=PhysicStateType.RIGIDBODY, - color=(1.0, 0.0, 0.0), + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", ), - PrimitiveCubeCfg( - name="table", - size=(0.2, 0.3, 0.4), - mass=10.0, + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - color=(0.8, 0.6, 0.4), - fix_base_link=True, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", ), # Visualization: Trajectory waypoints (5 spheres showing trajectory path) RigidObjCfg( @@ -146,10 +188,12 @@ def __init__(self, scenario, device=None): self.reward_functions = [ self._reward_gripper_approach, self._reward_grasp, + self._reward_gripper_downward_alignment, ] self.reward_weights = [ self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + 0.1, # weight for keeping the gripper pointing downward ] # Get config values @@ -346,54 +390,84 @@ def _reward_grasp(self, env_states) -> torch.Tensor: """Reward for maintaining grasp state (continuous reward while grasped).""" # Use cached grasp state (computed in step method) return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments.""" init = [ { "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.280000, 0.130000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.640000, -0.320000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, "object": { - "pos": torch.tensor([0.654277, -0.345737, 0.020000]), - "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), }, - "table": { - "pos": torch.tensor([0.499529, 0.253941, 0.200000]), - "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), }, - # Trajectory waypoints (world coordinates) "traj_marker_0": { - "pos": torch.tensor([0.610000, -0.280000, 0.150000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.350000, -0.310000, 0.870000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), }, "traj_marker_1": { - "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "pos": torch.tensor([0.330000, -0.290000, 0.980000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_2": { - "pos": torch.tensor([0.560000, -0.110000, 0.360000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + "pos": torch.tensor([0.340000, -0.200000, 1.090000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_3": { - "pos": torch.tensor([0.530000, 0.010000, 0.470000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.330000, -0.040000, 1.180000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), }, "traj_marker_4": { - "pos": torch.tensor([0.510000, 0.130000, 0.460000]), - "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + "pos": torch.tensor([0.310000, 0.150000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, }, "robots": { "franka": { - "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), "dof_pos": { - "panda_finger_joint1": 0.04, - "panda_finger_joint2": 0.04, - "panda_joint1": 0.0, + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, "panda_joint2": -0.785398, - "panda_joint3": 0.0, + "panda_joint3": 0.000000, "panda_joint4": -2.356194, - "panda_joint5": 0.0, + "panda_joint5": 0.000000, "panda_joint6": 1.570796, "panda_joint7": 0.785398, }, diff --git a/roboverse_pack/tasks/pick_place/track.py b/roboverse_pack/tasks/pick_place/track_banana.py similarity index 75% rename from roboverse_pack/tasks/pick_place/track.py rename to roboverse_pack/tasks/pick_place/track_banana.py index 0fb2e33a8..4f74fc447 100644 --- a/roboverse_pack/tasks/pick_place/track.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -9,13 +9,15 @@ import os import pickle from copy import deepcopy +from pathlib import Path import numpy as np import torch +import yaml from loguru import logger as log from metasim.constants import PhysicStateType -from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase @@ -129,7 +131,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, "tracking_progress": 150.0, "rotation_tracking": 2.0, }) -# 移除不需要的奖励 +# Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) # Disable randomization for exact state reproduction @@ -138,8 +140,8 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 -@register_task("pick_place.track", "pick_place_track") -class PickPlaceTrack(PickPlaceBase): +@register_task("pick_place.track_banana", "pick_place_track_banana") +class PickPlaceTrackBanana(PickPlaceBase): """Trajectory tracking task from grasp states. Assumes object is already grasped, only learns trajectory following. @@ -148,22 +150,63 @@ class PickPlaceTrack(PickPlaceBase): scenario = ScenarioCfg( objects=[ - PrimitiveCubeCfg( + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( name="object", - size=(0.04, 0.04, 0.04), - mass=0.02, + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - color=(1.0, 0.0, 0.0), + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", ), - PrimitiveCubeCfg( - name="table", - size=(0.2, 0.3, 0.4), - mass=10.0, + RigidObjCfg( + name="screw_driver", + scale=(1.5, 1.5, 1.5), physics=PhysicStateType.RIGIDBODY, - color=(0.8, 0.6, 0.4), - fix_base_link=True, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", ), - # Trajectory waypoint markers + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) RigidObjCfg( name="traj_marker_0", urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", @@ -229,9 +272,23 @@ class PickPlaceTrack(PickPlaceBase): max_episode_steps = 200 def __init__(self, scenario, device=None): - self.state_file_path = ( - "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" - ) + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] self._loaded_states = None if device is None: @@ -354,9 +411,13 @@ def reset(self, env_ids=None): def step(self, actions): """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions + new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) real_actions[:, 0] = gripper_value_closed diff --git a/roboverse_pack/tasks/pick_place/track_screwdriver.py b/roboverse_pack/tasks/pick_place/track_screwdriver.py new file mode 100644 index 000000000..de46689c9 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_screwdriver.py @@ -0,0 +1,447 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 2.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_screwdriver", "pick_place_track_screwdriver") +class PickPlaceTrackScrewDriver(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="object", + scale=(1.5, 1.5, 1.5), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_screwdriver.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 1.0, # rotation_tracking weight is already applied inside the function + ] + self.grasp_check_distance = 0.25 + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info From c13ef641e76478d944411a9af895b109bb5f90e0 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Tue, 2 Dec 2025 17:22:11 -0800 Subject: [PATCH 07/37] Update pick_place configs and tasks --- roboverse_pack/tasks/pick_place/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index 9c0457e25..9851f0def 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -32,6 +32,8 @@ python -m roboverse_learn.rl.fast_td3.evaluate_lift \ --traj_dir eval_trajs ``` +BUG: Success rate more than 100% + This generates: - **States file**: `eval_states/pick_place.approach_grasp_simple_franka_lift_states_*.pkl` (stable grasp states) - **Trajectories**: First-half trajectories in `eval_trajs/` @@ -50,6 +52,8 @@ Make sure `track.yaml` has the correct `state_file_path` pointing to the states state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" ``` +BUG: state_file_path is hard-coded + ### Stage 4: Evaluate Track Evaluate the track task to get second-half trajectories: From 3196e1839367c800f034ac2a7f38cf34df8b91e5 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Fri, 5 Dec 2025 21:48:58 -0800 Subject: [PATCH 08/37] Fix bug that success rate is more than 100% in evaluate_lift.py; Fixed hard-coded path in track.py --- roboverse_pack/tasks/pick_place/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index 9851f0def..9c0457e25 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -32,8 +32,6 @@ python -m roboverse_learn.rl.fast_td3.evaluate_lift \ --traj_dir eval_trajs ``` -BUG: Success rate more than 100% - This generates: - **States file**: `eval_states/pick_place.approach_grasp_simple_franka_lift_states_*.pkl` (stable grasp states) - **Trajectories**: First-half trajectories in `eval_trajs/` @@ -52,8 +50,6 @@ Make sure `track.yaml` has the correct `state_file_path` pointing to the states state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_101states_20251122_180651.pkl" ``` -BUG: state_file_path is hard-coded - ### Stage 4: Evaluate Track Evaluate the track task to get second-half trajectories: From d67aab3e105b4e12223791071917bc18bc059f76 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Wed, 10 Dec 2025 16:10:41 -0800 Subject: [PATCH 09/37] Fixing success rate bug; Enable more flexible evaluation that user can create less number of environment in evaluation --- roboverse_learn/rl/fast_td3/evaluate.py | 83 +++++++++++++++++++- roboverse_learn/rl/fast_td3/evaluate_lift.py | 17 +++- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/roboverse_learn/rl/fast_td3/evaluate.py b/roboverse_learn/rl/fast_td3/evaluate.py index 288595042..0a2d984a4 100644 --- a/roboverse_learn/rl/fast_td3/evaluate.py +++ b/roboverse_learn/rl/fast_td3/evaluate.py @@ -421,6 +421,66 @@ def evaluate( return stats +def _adjust_state_dict_for_model(checkpoint_state: dict, model: torch.nn.Module): + """Adjust tensors from checkpoint_state to better match model.state_dict() shapes. + + - If a tensor differs only in the leading (0-th) dimension and the remaining dims match, + we slice or repeat rows to match the model shape. + - If total numel matches, we reshape. + - Otherwise we fall back to the model's own parameter to avoid shape errors. + """ + model_state = model.state_dict() + new_state = {} + for k, v in checkpoint_state.items(): + if k not in model_state: + # keep unknown keys (they may be used elsewhere) + new_state[k] = v + continue + + mv = model_state[k] + # Only handle tensors; keep other items as-is + if not isinstance(v, torch.Tensor) or not isinstance(mv, torch.Tensor): + new_state[k] = v + continue + + if v.shape == mv.shape: + new_state[k] = v + continue + + # Case: same trailing dims, mismatched leading dim (common when num_envs differs) + if v.ndim == mv.ndim and v.shape[1:] == mv.shape[1:]: + desired = mv.shape[0] + src = v + if src.shape[0] >= desired: + new_state[k] = src[:desired].to(mv.device).clone() + else: + # Repeat rows to reach desired size then slice + reps = (desired + src.shape[0] - 1) // src.shape[0] + tiled = src.repeat(reps, *([1] * (src.ndim - 1))) + new_state[k] = tiled[:desired].to(mv.device).clone() + log.info(f"Adjusted checkpoint param '{k}' from {v.shape} -> {new_state[k].shape}") + continue + + # If total elements match, reshape + if v.numel() == mv.numel(): + try: + new_state[k] = v.reshape(mv.shape).to(mv.device).clone() + log.info(f"Reshaped checkpoint param '{k}' from {v.shape} -> {mv.shape}") + continue + except Exception: + pass + + # Last resort: try broadcasting/expanding + try: + new_state[k] = v.to(mv.device).expand_as(mv).clone() + log.info(f"Expanded checkpoint param '{k}' from {v.shape} -> {mv.shape}") + continue + except Exception: + log.warning(f"Could not match shape for param '{k}' ({v.shape} -> {mv.shape}); keeping model init") + new_state[k] = mv + return new_state + + def main(): parser = argparse.ArgumentParser(description='FastTD3 Evaluation') parser.add_argument('--checkpoint', type=str, default='roboverse_data/models/walk_1400.pt', @@ -524,9 +584,26 @@ def main(): obs_normalizer = EmpiricalNormalization(shape=n_obs, device=device) # Load weights - actor.load_state_dict(checkpoint["actor_state_dict"]) - if checkpoint.get("obs_normalizer_state"): - obs_normalizer.load_state_dict(checkpoint["obs_normalizer_state"]) + # Safely adjust checkpoint actor state to match current model shapes (handles num_envs mismatch) + ck_actor_state = checkpoint.get("actor_state_dict", {}) + if ck_actor_state: + try: + adjusted = _adjust_state_dict_for_model(ck_actor_state, actor) + # load non-strictly to allow missing/extra keys + actor.load_state_dict(adjusted, strict=False) + except Exception as e: + log.exception("Failed to load actor_state_dict with adjustment, falling back to strict load: %s", e) + actor.load_state_dict(ck_actor_state, strict=False) + else: + log.warning("No actor_state_dict present in checkpoint") + + # Load obs normalizer safely + try: + obs_state = checkpoint.get("obs_normalizer_state") + if obs_state: + obs_normalizer.load_state_dict(obs_state) + except Exception: + log.warning("Failed to load obs_normalizer_state from checkpoint; skipping") # Setup AMP amp_enabled = config.get("amp", False) and torch.cuda.is_available() diff --git a/roboverse_learn/rl/fast_td3/evaluate_lift.py b/roboverse_learn/rl/fast_td3/evaluate_lift.py index f83180311..ba613cb31 100644 --- a/roboverse_learn/rl/fast_td3/evaluate_lift.py +++ b/roboverse_learn/rl/fast_td3/evaluate_lift.py @@ -332,6 +332,15 @@ def evaluate_lift_collection( else: obs = next_obs + # Treat each active env as an attempted episode, and count it as successful if it already + active_envs = (~done_masks).sum().item() if 'done_masks' in locals() else 0 + successes_in_active = sum( + 1 for i in range(num_eval_envs) + if 'done_masks' in locals() and not done_masks[i] and success_in_episode.get(i, False) + ) + attempted_episodes = episodes_completed + active_envs + total_successful_episodes = successful_episodes_count + successes_in_active + if len(collected_trajs) > 0: os.makedirs(traj_dir, exist_ok=True) os.makedirs(state_dir, exist_ok=True) @@ -357,12 +366,14 @@ def evaluate_lift_collection( log.info(f" - State count: {len(collected_states)}") else: log.warning("No successful trajectories collected") + # Success rate: fraction of attempted episodes (completed + in-progress when we stopped) + denom = max(attempted_episodes, 1) + success_rate = min(total_successful_episodes, denom) / denom stats = { "collected_count": len(collected_trajs), "target_count": target_count, - "episodes_completed": episodes_completed, - # success_rate = successful simulations / total simulations (episodes) - "success_rate": successful_episodes_count / episodes_completed if episodes_completed > 0 else 0.0, + "episodes_completed": attempted_episodes, + "success_rate": success_rate, } return stats From 3ad659f654a6611005f096fc8737414c1eb26b85 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Wed, 10 Dec 2025 17:12:58 -0800 Subject: [PATCH 10/37] Make training script individual; Add gripper downward reward --- ...pick_place.yaml => pick_place_banana.yaml} | 2 +- .../configs/pick_place_screwdriver.yaml | 87 ++++ .../configs/{track.yaml => track_banana.yaml} | 6 +- .../fast_td3/configs/track_screwdriver.yaml | 93 ++++ .../rl/fast_td3/replay_lift_states.py | 2 +- .../tasks/pick_place/approach_grasp_banana.py | 480 ++++++++++++++++++ ...grasp.py => approach_grasp_screwdriver.py} | 142 ++++-- .../pick_place/{track.py => track_banana.py} | 99 +++- .../tasks/pick_place/track_screwdriver.py | 447 ++++++++++++++++ 9 files changed, 1300 insertions(+), 58 deletions(-) rename roboverse_learn/rl/fast_td3/configs/{pick_place.yaml => pick_place_banana.yaml} (98%) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml rename roboverse_learn/rl/fast_td3/configs/{track.yaml => track_banana.yaml} (94%) create mode 100644 roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana.py rename roboverse_pack/tasks/pick_place/{approach_grasp.py => approach_grasp_screwdriver.py} (70%) rename roboverse_pack/tasks/pick_place/{track.py => track_banana.py} (75%) create mode 100644 roboverse_pack/tasks/pick_place/track_screwdriver.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml similarity index 98% rename from roboverse_learn/rl/fast_td3/configs/pick_place.yaml rename to roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml index 065d7cd8c..43d6f5562 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml @@ -6,7 +6,7 @@ # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.approach_grasp_simple" +task: "pick_place.approach_grasp_banana" decimation: 4 train_or_eval: "train" headless: True diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml new file mode 100644 index 000000000..3bb351508 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml @@ -0,0 +1,87 @@ +# Base Configuration for FastTD3 Training +# Default configuration with IsaacGym simulator and H1 humanoid robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_screwdriver" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3" +use_wandb: false +checkpoint_path: null +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml similarity index 94% rename from roboverse_learn/rl/fast_td3/configs/track.yaml rename to roboverse_learn/rl/fast_td3/configs/track_banana.yaml index b06adffb0..4fe16b044 100644 --- a/roboverse_learn/rl/fast_td3/configs/track.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -7,14 +7,14 @@ # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.track" +task: "pick_place.track_banana" decimation: 4 train_or_eval: "train" -headless: True +headless: False # State file path for track task (pkl file path for grasp states) # If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE -state_file_path: "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" +state_file_path: "eval_states/pick_place.approach_grasp_banana_franka_lift_states_100states_20251210_153518.pkl" # ------------------------------------------------------------------------------- # Seeds & Device diff --git a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml new file mode 100644 index 000000000..ea9360e3f --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml @@ -0,0 +1,93 @@ +# Configuration for FastTD3 Training - Track Task +# Stage 3: Track task - for training trajectory tracking +# Starts from saved grasp states, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_screwdriver" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_screwdriver_franka_lift_states_100states_20251210_154907.pkl" + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 + +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track" +exp_name: "pick_place_track" +use_wandb: false +checkpoint_path: null +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/replay_lift_states.py b/roboverse_learn/rl/fast_td3/replay_lift_states.py index aad63f46d..6a77106df 100644 --- a/roboverse_learn/rl/fast_td3/replay_lift_states.py +++ b/roboverse_learn/rl/fast_td3/replay_lift_states.py @@ -36,7 +36,7 @@ from metasim.scenario.cameras import PinholeCameraCfg from metasim.task.registry import get_task_class -from roboverse_pack.tasks.pick_place.track import convert_state_dict_to_initial_state +from roboverse_pack.tasks.pick_place.track_banana import convert_state_dict_to_initial_state def load_states_from_pkl(pkl_path: str): diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py new file mode 100644 index 000000000..a8e6ed6c4 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -0,0 +1,480 @@ +"""Stage 1: Simple Approach and Grasp task with gripper control. + +This task focuses on learning to approach the object, grasp it with gripper, and lift it. +Simple gripper control: close when near the object. +""" + +from __future__ import annotations + +from copy import deepcopy + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +@register_task("pick_place.approach_grasp_banana", "pick_place_approach_grasp_banana") +class PickPlaceApproachGraspBanana(PickPlaceBase): + """Simple Approach and Grasp task with gripper control. + + This task focuses on: + - Approaching the object + - Grasping the object with simple gripper control (close when near) + + Success condition: Object is grasped (reward given when entering grasp state). + Episode terminates if object is released. + """ + + GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing + GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp + + # Joint2 lift parameters (for franka: panda_joint2) + JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) + JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control + JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step + + DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) + DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ + "gripper_approach": 0.5, + "grasp_reward": 4.0, + }) + DEFAULT_CONFIG_SIMPLE["grasp_config"] = { + "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, + "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, + } + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="screw_driver", + scale=(1.5, 1.5, 1.5), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Placeholders needed during super().__init__ (reset may be called there) + self.object_grasped = None + self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 + self._grasp_notified = None + self._distance_history = None # History buffer for stable grasp check + self.joint2_name = "panda_joint2" + self.joint2_index = None + self.initial_joint_pos = None + + super().__init__(scenario, device) + + # Override reward functions for this task + self.reward_functions = [ + self._reward_gripper_approach, + self._reward_grasp, + self._reward_gripper_downward_alignment, + ] + self.reward_weights = [ + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + 0.1, # weight for keeping the gripper pointing downward + ] + + # Get config values + grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] + self.grasp_check_distance = grasp_config["grasp_check_distance"] + self.gripper_close_distance = grasp_config["gripper_close_distance"] + + # Initialize tracking buffers + self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + + # Find joint2 index + joint_names = self.handler.get_joint_names(self.robot_name, sort=True) + if self.joint2_name in joint_names: + self.joint2_index = joint_names.index(self.joint2_name) + else: + log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") + + def reset(self, env_ids=None): + """Reset environment and tracking variables.""" + obs, info = super().reset(env_ids=env_ids) + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + + # Reset grasp tracking + self.object_grasped[env_ids_tensor] = False + if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + else: + self._grasp_notified[env_ids_tensor] = False + self._distance_history[env_ids_tensor] = float("inf") + + # Store initial joint positions if not already stored + if self.initial_joint_pos is None: + states = self.handler.get_states(mode="tensor") + self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() + + return obs, info + + def step(self, actions): + """Step with delta control and simple gripper control.""" + current_states = self.handler.get_states(mode="tensor") + box_pos = current_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(current_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near object + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + # Call RLTaskEnv.step directly + # Note: reward functions will be called inside super().step() + # and they will compute newly_grasped by comparing current state with self.object_grasped + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step (for next step's comparison) + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_grasped] = True + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_released] = False + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _apply_simple_gripper_control(self, actions, gripper_box_dist): + """Simple gripper control: close when near object.""" + # Close gripper when close to object + gripper_close = gripper_box_dist < self.gripper_close_distance + gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed + gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open + + # Set gripper joints + for gripper_idx in self.gripper_joint_indices: + actions[:, gripper_idx] = torch.where( + gripper_close, + gripper_value_close, + gripper_value_open, + ) + + return actions + + def _apply_joint2_lift_control(self, actions, current_states): + """Apply joint2 lift control when object is grasped.""" + if self.initial_joint_pos is None: + self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() + + joint_pos = current_states.robots[self.robot_name].joint_pos + joint2_idx = self.joint2_index + + # Target position: initial position + lift offset (positive offset lifts up) + target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET + joint_error = target_lift - joint_pos[:, joint2_idx] + + # Apply proportional control with max delta limit + desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error + delta = torch.clamp( + desired - joint_pos[:, joint2_idx], + -self.JOINT2_LIFT_MAX_DELTA, + self.JOINT2_LIFT_MAX_DELTA, + ) + joint2_value = torch.clamp( + joint_pos[:, joint2_idx] + delta, + self._action_low[joint2_idx], + self._action_high[joint2_idx], + ) + + # Apply lift control only to environments where object is grasped + actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] + + return actions + + def _compute_grasp_state(self, states): + """Compute if object is grasped (requires 5 stable frames based on distance only).""" + box_pos = states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _reward_gripper_approach(self, env_states) -> torch.Tensor: + """Reward for gripper approaching the box.""" + box_pos = env_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(env_states) + gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) + + approach_reward_far = 1 - torch.tanh(gripper_box_dist) + approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) + return approach_reward_far + approach_reward_near + + def _reward_grasp(self, env_states) -> torch.Tensor: + """Reward for maintaining grasp state (continuous reward while grasped).""" + # Use cached grasp state (computed in step method) + return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments.""" + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.220000, 0.200000, 0.955000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "screw_driver": { + "pos": torch.tensor([0.530000, -0.410000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.280000, 0.825000]), + "rot": torch.tensor([0.889292, -0.000000, 0.000001, -0.457338]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.260000, -0.240000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.270000, -0.190000, 0.970000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.260000, -0.140000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.250000, -0.030000, 1.240000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.310000, 0.190000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.475731, -0.000000, -0.000001, 0.879590]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py similarity index 70% rename from roboverse_pack/tasks/pick_place/approach_grasp.py rename to roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index c2500797d..d72a14d50 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -12,14 +12,15 @@ from loguru import logger as log from metasim.constants import PhysicStateType -from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase -@register_task("pick_place.approach_grasp_simple", "pick_place_approach_grasp_simple") -class PickPlaceApproachGraspSimple(PickPlaceBase): +@register_task("pick_place.approach_grasp_screwdriver", "pick_place_approach_screwdriver") +class PickPlaceApproachGraspScrewDriver(PickPlaceBase): """Simple Approach and Grasp task with gripper control. This task focuses on: @@ -50,20 +51,61 @@ class PickPlaceApproachGraspSimple(PickPlaceBase): scenario = ScenarioCfg( objects=[ - PrimitiveCubeCfg( + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( name="object", - size=(0.04, 0.04, 0.04), - mass=0.02, + scale=(1.5, 1.5, 1.5), physics=PhysicStateType.RIGIDBODY, - color=(1.0, 0.0, 0.0), + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", ), - PrimitiveCubeCfg( - name="table", - size=(0.2, 0.3, 0.4), - mass=10.0, + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - color=(0.8, 0.6, 0.4), - fix_base_link=True, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", ), # Visualization: Trajectory waypoints (5 spheres showing trajectory path) RigidObjCfg( @@ -146,10 +188,12 @@ def __init__(self, scenario, device=None): self.reward_functions = [ self._reward_gripper_approach, self._reward_grasp, + self._reward_gripper_downward_alignment, ] self.reward_weights = [ self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + 0.1, # weight for keeping the gripper pointing downward ] # Get config values @@ -346,54 +390,84 @@ def _reward_grasp(self, env_states) -> torch.Tensor: """Reward for maintaining grasp state (continuous reward while grasped).""" # Use cached grasp state (computed in step method) return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments.""" init = [ { "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.280000, 0.130000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.640000, -0.320000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, "object": { - "pos": torch.tensor([0.654277, -0.345737, 0.020000]), - "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), }, - "table": { - "pos": torch.tensor([0.499529, 0.253941, 0.200000]), - "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), }, - # Trajectory waypoints (world coordinates) "traj_marker_0": { - "pos": torch.tensor([0.610000, -0.280000, 0.150000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.350000, -0.310000, 0.870000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), }, "traj_marker_1": { - "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "pos": torch.tensor([0.330000, -0.290000, 0.980000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_2": { - "pos": torch.tensor([0.560000, -0.110000, 0.360000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + "pos": torch.tensor([0.340000, -0.200000, 1.090000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_3": { - "pos": torch.tensor([0.530000, 0.010000, 0.470000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.330000, -0.040000, 1.180000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), }, "traj_marker_4": { - "pos": torch.tensor([0.510000, 0.130000, 0.460000]), - "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + "pos": torch.tensor([0.310000, 0.150000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, }, "robots": { "franka": { - "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), "dof_pos": { - "panda_finger_joint1": 0.04, - "panda_finger_joint2": 0.04, - "panda_joint1": 0.0, + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, "panda_joint2": -0.785398, - "panda_joint3": 0.0, + "panda_joint3": 0.000000, "panda_joint4": -2.356194, - "panda_joint5": 0.0, + "panda_joint5": 0.000000, "panda_joint6": 1.570796, "panda_joint7": 0.785398, }, diff --git a/roboverse_pack/tasks/pick_place/track.py b/roboverse_pack/tasks/pick_place/track_banana.py similarity index 75% rename from roboverse_pack/tasks/pick_place/track.py rename to roboverse_pack/tasks/pick_place/track_banana.py index 0fb2e33a8..4f74fc447 100644 --- a/roboverse_pack/tasks/pick_place/track.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -9,13 +9,15 @@ import os import pickle from copy import deepcopy +from pathlib import Path import numpy as np import torch +import yaml from loguru import logger as log from metasim.constants import PhysicStateType -from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase @@ -129,7 +131,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, "tracking_progress": 150.0, "rotation_tracking": 2.0, }) -# 移除不需要的奖励 +# Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) # Disable randomization for exact state reproduction @@ -138,8 +140,8 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 -@register_task("pick_place.track", "pick_place_track") -class PickPlaceTrack(PickPlaceBase): +@register_task("pick_place.track_banana", "pick_place_track_banana") +class PickPlaceTrackBanana(PickPlaceBase): """Trajectory tracking task from grasp states. Assumes object is already grasped, only learns trajectory following. @@ -148,22 +150,63 @@ class PickPlaceTrack(PickPlaceBase): scenario = ScenarioCfg( objects=[ - PrimitiveCubeCfg( + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( name="object", - size=(0.04, 0.04, 0.04), - mass=0.02, + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - color=(1.0, 0.0, 0.0), + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", ), - PrimitiveCubeCfg( - name="table", - size=(0.2, 0.3, 0.4), - mass=10.0, + RigidObjCfg( + name="screw_driver", + scale=(1.5, 1.5, 1.5), physics=PhysicStateType.RIGIDBODY, - color=(0.8, 0.6, 0.4), - fix_base_link=True, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", ), - # Trajectory waypoint markers + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) RigidObjCfg( name="traj_marker_0", urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", @@ -229,9 +272,23 @@ class PickPlaceTrack(PickPlaceBase): max_episode_steps = 200 def __init__(self, scenario, device=None): - self.state_file_path = ( - "eval_states/pick_place.approach_grasp_simple_franka_lift_states_100states_20251126_170312.pkl" - ) + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] self._loaded_states = None if device is None: @@ -354,9 +411,13 @@ def reset(self, env_ids=None): def step(self, actions): """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions + new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) real_actions[:, 0] = gripper_value_closed diff --git a/roboverse_pack/tasks/pick_place/track_screwdriver.py b/roboverse_pack/tasks/pick_place/track_screwdriver.py new file mode 100644 index 000000000..de46689c9 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_screwdriver.py @@ -0,0 +1,447 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 2.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_screwdriver", "pick_place_track_screwdriver") +class PickPlaceTrackScrewDriver(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="object", + scale=(1.5, 1.5, 1.5), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_screwdriver.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 1.0, # rotation_tracking weight is already applied inside the function + ] + self.grasp_check_distance = 0.25 + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info From 0355e6007d76a7cea7e6dfc759dcba25f73d66bc Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Thu, 11 Dec 2025 17:01:42 -0800 Subject: [PATCH 11/37] Resolved merge conflicts by keeping local versions --- get_started/obj_layout/object_layout.py | 229 +++++-- .../rl/fast_td3/configs/pick_place_bowl.yaml | 90 +++ .../configs/pick_place_cuttingtool.yaml | 90 +++ .../rl/fast_td3/configs/pick_place_spoon.yaml | 90 +++ .../rl/fast_td3/configs/track_spoon.yaml | 98 +++ roboverse_learn/rl/fast_td3/train.py | 41 +- roboverse_pack/tasks/pick_place/README.md | 2 +- .../tasks/pick_place/approach_grasp_bowl.py | 382 +++++++++++ .../pick_place/approach_grasp_cuttingtool.py | 349 ++++++++++ .../tasks/pick_place/approach_grasp_spoon.py | 227 +++++++ roboverse_pack/tasks/pick_place/base.py | 30 +- .../tasks/pick_place/track_spoon.py | 614 ++++++++++++++++++ 12 files changed, 2180 insertions(+), 62 deletions(-) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/track_spoon.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_bowl.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_spoon.py create mode 100644 roboverse_pack/tasks/pick_place/track_spoon.py diff --git a/get_started/obj_layout/object_layout.py b/get_started/obj_layout/object_layout.py index d08f45148..21566b0a4 100644 --- a/get_started/obj_layout/object_layout.py +++ b/get_started/obj_layout/object_layout.py @@ -385,63 +385,161 @@ def __post_init__(self): usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", - fix_base_link=True, ), + # RigidObjCfg( + # name="lamp", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + # ), RigidObjCfg( - name="banana", + name="basket", scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", ), + # RigidObjCfg( + # name="bowl", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + # ), RigidObjCfg( - name="mug", + name="cutting_tools", scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", ), + # RigidObjCfg( + # name="screw_driver", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + # ), + # RigidObjCfg( + # name="spoon", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + # ), + # RigidObjCfg( + # name="banana", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + # ), + # RigidObjCfg( + # name="mug", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + # ), + # RigidObjCfg( + # name="book", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + # ), + # RigidObjCfg( + # name="lamp", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/usd/lamp.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/result/lamp.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/mjcf/lamp.xml", + # ), + # RigidObjCfg( + # name="remote_control", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + # ), + # RigidObjCfg( + # name="rubiks_cube", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/usd/rubik's_cube.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/result/rubik's_cube.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/mjcf/rubik's_cube.xml", + # ), + # RigidObjCfg( + # name="vase", + # scale=(1, 1, 1), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + # ), + # Trajectory markers RigidObjCfg( - name="book", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, ), RigidObjCfg( - name="lamp", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/usd/lamp.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/result/lamp.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/lamp/mjcf/lamp.xml", + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, ), RigidObjCfg( - name="remote_control", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, ), RigidObjCfg( - name="rubiks_cube", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/usd/rubik's_cube.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/result/rubik's_cube.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/mjcf/rubik's_cube.xml", + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, ), RigidObjCfg( - name="vase", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, ), ], ) @@ -468,10 +566,6 @@ def __post_init__(self): "pos": torch.tensor([0.3, -0.28, 0.82]), # Book on table "rot": torch.tensor([1.0, 0.0, 0.0, 0.0]), }, - "lamp": { - "pos": torch.tensor([0.68, 0.10, 1.05]), # Lamp on table - "rot": torch.tensor([1.0, 0.0, 0.0, 0.0]), - }, "remote_control": { "pos": torch.tensor([0.68, -0.54, 0.811]), # Remote on table "rot": torch.tensor([1.0, 0.0, 0.0, 0.0]), @@ -484,6 +578,51 @@ def __post_init__(self): "pos": torch.tensor([0.30, 0.05, 0.95]), # Vase on table "rot": torch.tensor([1.0, 0.0, 0.0, 0.0]), }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.280000, 0.130000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.640000, -0.320000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "screw_driver": { + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + # Trajectory markers - initial positions + "traj_marker_0": { + "pos": torch.tensor([0.380000, -0.500000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.390000, -0.420000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.350000, -0.320000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.330000, -0.160000, 1.100000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.310000, 0.150000, 1.130000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, }, "robots": { "franka": { @@ -657,8 +796,6 @@ def __post_init__(self): handler.set_states(states) - handler.simulate() - if step % 10 == 0: handler.refresh_render() step += 1 @@ -666,4 +803,4 @@ def __post_init__(self): keyboard_client.close() handler.close() - log.info(f"Done (steps: {step})") + log.info(f"Done (steps: {step})") \ No newline at end of file diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml new file mode 100644 index 000000000..fbea3645c --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml @@ -0,0 +1,90 @@ +# Base Configuration for FastTD3 Training - Bowl +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_bowl" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_bowl" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_bowl" # Unique run name for bowl task +model_dir: "models/bowl" # Separate directory for bowl checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml new file mode 100644 index 000000000..1e4db8bf8 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml @@ -0,0 +1,90 @@ +# Base Configuration for FastTD3 Training - Cutting Tool +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_cuttingtool" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_cuttingtool" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_cuttingtool" # Unique run name for cutting_tools task +model_dir: "models/cuttingtool" # Separate directory for cutting_tools checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml new file mode 100644 index 000000000..49dd350b9 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml @@ -0,0 +1,90 @@ +# Base Configuration for FastTD3 Training - Spoon +# Default configuration with IsaacGym simulator and H1 humanoid robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_spoon" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_spoon" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_spoon" # Unique run name +model_dir: "models/spoon_without_Zaxis" # Separate directory for basket layout +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + diff --git a/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml b/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml new file mode 100644 index 000000000..7348760a7 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml @@ -0,0 +1,98 @@ +# Configuration for FastTD3 Training - Track Task (Spoon) +# Stage 3: Track task for spoon object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_spoon, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_spoon" # Uses spoon-specific track task +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# Note: This is for reference/documentation. The actual path is set in track_spoon.py __init__ +# After running evaluate_lift.py for the spoon task, update the state_file_path in track_spoon.py +# to point to the generated states file (e.g., from eval_states/ directory) +state_file_path: "eval_states/pick_place.approach_grasp_simple_spoon_franka_lift_states_130states_20251208_172505.pkl" + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 + +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_spoon" +exp_name: "pick_place_track_spoon" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_spoon" # Unique run name for spoon track task +model_dir: "models/track_spoon" # Separate directory for track spoon checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + diff --git a/roboverse_learn/rl/fast_td3/train.py b/roboverse_learn/rl/fast_td3/train.py index dfb03aaab..347b3ca2e 100644 --- a/roboverse_learn/rl/fast_td3/train.py +++ b/roboverse_learn/rl/fast_td3/train.py @@ -58,11 +58,11 @@ def get_config(): torch.set_float32_matmul_precision("high") +import inspect import numpy as np import torch import torch.nn.functional as F import tqdm -import wandb from loguru import logger as log from tensordict import TensorDict from torch import optim @@ -137,11 +137,14 @@ def main() -> None: scaler = GradScaler(enabled=amp_enabled and amp_dtype == torch.float16) - if cfg("use_wandb") and cfg("train_or_eval") == "train": - wandb.init( - project=cfg("wandb_project", "fttd3_training"), - save_code=True, - ) + # Import wandb if enabled + if cfg("use_wandb"): + import wandb + if cfg("train_or_eval") == "train": + wandb.init( + project=cfg("wandb_project", "fttd3_training"), + save_code=True, + ) random.seed(cfg("seed")) np.random.seed(cfg("seed")) @@ -166,9 +169,23 @@ def main() -> None: scenario = task_cls.scenario.update( robots=cfg("robots"), simulator=cfg("sim"), num_envs=cfg("num_envs"), headless=cfg("headless"), cameras=[] ) - envs = task_cls(scenario, device=device) - from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper - envs = TaskViserWrapper(envs) + # Check if task class accepts state_file_path parameter (only track tasks do) + init_signature = inspect.signature(task_cls.__init__) + accepts_state_file_path = "state_file_path" in init_signature.parameters + + # Pass state_file_path from config if task accepts it (for track tasks) + state_file_path = cfg("state_file_path", None) + if accepts_state_file_path and state_file_path is not None: + envs = task_cls(scenario, device=device, state_file_path=state_file_path) + else: + envs = task_cls(scenario, device=device) + # Only use viser wrapper if not headless and viser is available + if not cfg("headless"): + try: + from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper + envs = TaskViserWrapper(envs) + except ImportError: + log.warning("Viser not available, skipping visualization wrapper") eval_envs = envs # ---------------- derive shapes ------------------------------------ @@ -299,7 +316,11 @@ def render_with_rollout() -> list: scenario_render = scenario.update( robots=robots, simulator=simulator, num_envs=num_envs, headless=headless, cameras=cameras ) - env = task_cls(scenario_render, device=device) + # Pass state_file_path from config if task accepts it (for track tasks) + if accepts_state_file_path and state_file_path is not None: + env = task_cls(scenario_render, device=device, state_file_path=state_file_path) + else: + env = task_cls(scenario_render, device=device) obs_normalizer.eval() obs, info = env.reset() diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index 9c0457e25..fb36e4bdb 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -26,7 +26,7 @@ Evaluate the trained model and collect stable grasp states and first-half trajec ```bash python -m roboverse_learn.rl.fast_td3.evaluate_lift \ - --checkpoint models/pick_place.approach_grasp_simple_65000.pt \ + --checkpoint models/spoon_basket/pick_place.approach_grasp_simple_spoon_basket_1065000.pt \ --target_count 100 \ --state_dir eval_states \ --traj_dir eval_trajs diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py new file mode 100644 index 000000000..2767a4eec --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py @@ -0,0 +1,382 @@ +"""Stage 1: Simple Approach and Grasp task for bowl object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the bowl object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import os +import importlib.util +from copy import deepcopy + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat, quat_apply +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple +from roboverse_pack.tasks.pick_place.base import PickPlaceBase + + +@register_task("pick_place.approach_grasp_simple_bowl", "pick_place_approach_grasp_simple_bowl") +class PickPlaceApproachGraspSimpleBowl(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for bowl object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses bowl mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251208_bowl_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # Use actual bowl mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + # Use actual table mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + fix_base_link=True, + ), + # Basket for visualization (matches object_layout.py) + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + super().__init__(scenario, device) + + # Grasp point offset for bowl: point to the rim/edge instead of center + # Bowl coordinate frame: (x, y) at center of bowl, z is height (vertical) + # The bowl's origin is at the center/bottom, but we want to grasp at the rim + # Based on mesh analysis: bowl radius varies with height + # - At z=0.1m: radius ~0.099m + # - At z=0.12m: radius ~0.107m + # - At z=0.125m: radius ~0.108m + # Offset format: [x, y, z] in object's local frame + # [0.099, 0.0, 0.1] = 9.9cm radius in x (matches edge at z=0.1m), 0 in y, 10cm upward in z + self.grasp_point_offset_local = torch.tensor([0.099, 0.0, 0.1]) # [x, y, z] in object frame + + # Note: gripper orientation reward is now inherited from base class (approach_grasp.py) + # No need to override reward functions here + + def _get_grasp_point(self, states): + """Get the grasp point for the bowl (rim/edge) instead of center. + + Bowl coordinate frame convention: + - (x, y) plane is at the center of the bowl + - z-axis is the height of the bowl (vertical, pointing up) + - Origin is at the center/bottom of the bowl + + The offset [0.099, 0.0, 0.1] points to the rim edge: + - 0.099m in x direction (radius from center, matches actual edge radius at z=0.1m) + - 0.0m in y direction (grasping at a specific point on rim) + - 0.1m in z direction (upward, at 10cm height from bottom) + + Args: + states: Environment states + + Returns: + grasp_point: (B, 3) grasp point in world coordinates (rim of bowl) + """ + # Get object center position and rotation + box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of bowl + box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - bowl orientation + + # Move offset to correct device and expand to batch size + offset_local = self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) # (B, 3) + + # Transform grasp point offset from object local frame to world frame + # quat_apply rotates a vector by a quaternion + # This accounts for the bowl's rotation in the world + offset_world = quat_apply(box_quat, offset_local) # (B, 3) + + # Add offset to object center to get grasp point (rim position) + grasp_point = box_pos + offset_world # (B, 3) + + return grasp_point + + def step(self, actions): + """Step with delta control and simple gripper control, using bowl rim as grasp point.""" + current_states = self.handler.get_states(mode="tensor") + + # Use grasp point (bowl rim) instead of object center + grasp_point = self._get_grasp_point(current_states) # (B, 3) + gripper_pos, _ = self._get_ee_state(current_states) + + # Calculate 3D Euclidean distance between gripper and grasp point (rim) + gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) + + # Calculate z-axis distance (vertical distance) + delta = gripper_pos - grasp_point + dist_z = torch.abs(delta[:, 2]) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near grasp point (both 3D distance and z-axis condition) + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist, dist_z) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step (for next step's comparison) + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_grasped] = True + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_released] = False + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _compute_grasp_state(self, states): + """Compute if object is grasped, using bowl rim as grasp point.""" + # Use grasp point (bowl rim) instead of object center + grasp_point = self._get_grasp_point(states) # (B, 3) + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _reward_gripper_approach(self, env_states) -> torch.Tensor: + """Reward for gripper approaching the bowl rim.""" + # Use grasp point (bowl rim) instead of object center + grasp_point = self._get_grasp_point(env_states) # (B, 3) + gripper_pos, _ = self._get_ee_state(env_states) + gripper_box_dist = torch.norm(grasp_point - gripper_pos, dim=-1) + + approach_reward_far = 1 - torch.tanh(gripper_box_dist) + approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) + return approach_reward_far + approach_reward_near + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads bowl, table, basket, and trajectory markers + from saved_poses_20251208_bowl_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251208_bowl_basket.py" + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses, mapping bowl to "object" + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Bowl as the object to pick + "object": saved_poses["objects"]["bowl"], + "table": saved_poses["objects"]["table"], + # Basket for visualization (if present in saved poses) + "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + else: + # Default poses (fallback) + init = [ + { + "objects": { + "object": { + "pos": torch.tensor([0.654277, -0.345737, 0.020000]), + "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + }, + "table": { + "pos": torch.tensor([0.499529, 0.253941, 0.200000]), + "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + }, + # Trajectory waypoints (world coordinates) + "traj_marker_0": { + "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.560000, -0.110000, 0.360000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.530000, 0.010000, 0.470000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.510000, 0.130000, 0.460000]), + "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.04, + "panda_finger_joint2": 0.04, + "panda_joint1": 0.0, + "panda_joint2": -0.785398, + "panda_joint3": 0.0, + "panda_joint4": -2.356194, + "panda_joint5": 0.0, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init + diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py new file mode 100644 index 000000000..20185c506 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py @@ -0,0 +1,349 @@ +"""Stage 1: Simple Approach and Grasp task for cutting_tools object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the cutting_tools object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import os +import importlib.util + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import quat_apply +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple +from roboverse_pack.tasks.pick_place.base import PickPlaceBase + + +@register_task("pick_place.approach_grasp_simple_cuttingtool", "pick_place_approach_grasp_simple_cuttingtool") +class PickPlaceApproachGraspSimpleCuttingTool(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for cutting_tools object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses cutting_tools mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251210_cuttingtool.py + """ + + scenario = ScenarioCfg( + objects=[ + # Use actual cutting_tools mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual table mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + fix_base_link=True, + ), + # Basket for visualization (matches object_layout.py) + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + super().__init__(scenario, device) + + # Grasp point offset for cuttingtool: point to the handle center instead of object center + # Cutting tools typically have handles at one end. The offset is in the object's local frame. + # Adjust these values based on your cuttingtool mesh: + # - Negative x typically points toward the handle end (assuming tool extends along x-axis) + # - You may need to adjust based on the actual mesh orientation + # Default: [-0.08, 0.0, 0.0] means 8cm along negative x-axis (toward handle) + # If your tool is oriented differently, adjust the offset accordingly + self.grasp_point_offset_local = torch.tensor([-0.08, 0.0, 0.0]) # [x, y, z] in object frame + + def _get_grasp_point(self, states): + """Get the grasp point for the cuttingtool (handle center) instead of object center. + + Args: + states: Environment states + + Returns: + grasp_point: (B, 3) grasp point in world coordinates (handle center) + """ + # Get object center position and rotation + box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of object + box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - object orientation + + # Move offset to correct device and expand to batch size + offset_local = self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) # (B, 3) + + # Transform grasp point offset from object local frame to world frame + # quat_apply rotates a vector by a quaternion + # This accounts for the object's rotation in the world + offset_world = quat_apply(box_quat, offset_local) # (B, 3) + + # Add offset to object center to get grasp point (handle position) + grasp_point = box_pos + offset_world # (B, 3) + + return grasp_point + + def step(self, actions): + """Step with delta control and simple gripper control, using handle center as grasp point.""" + current_states = self.handler.get_states(mode="tensor") + + # Use grasp point (handle center) instead of object center + grasp_point = self._get_grasp_point(current_states) # (B, 3) + gripper_pos, _ = self._get_ee_state(current_states) + + # Calculate 3D Euclidean distance between gripper and grasp point (handle) + gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near grasp point (ONLY 3D distance, NO z-axis condition) + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist, dist_z=None) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _compute_grasp_state(self, states): + """Compute if object is grasped, using handle center as grasp point.""" + # Use grasp point (handle center) instead of object center + grasp_point = self._get_grasp_point(states) # (B, 3) + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads cutting_tools, table, basket, and trajectory markers + from saved_poses_20251210_cuttingtool.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251210_cuttingtool.py" + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Cutting_tools as the object to pick + "object": saved_poses["objects"]["cutting_tools"], + "table": saved_poses["objects"]["table"], + # Basket for visualization (if present in saved poses) + "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + else: + # Default poses (fallback) + init = [ + { + "objects": { + "object": { + "pos": torch.tensor([0.654277, -0.345737, 0.020000]), + "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + }, + "table": { + "pos": torch.tensor([0.499529, 0.253941, 0.200000]), + "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + }, + # Trajectory waypoints (world coordinates) + "traj_marker_0": { + "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.560000, -0.110000, 0.360000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.530000, 0.010000, 0.470000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.510000, 0.130000, 0.460000]), + "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.04, + "panda_finger_joint2": 0.04, + "panda_joint1": 0.0, + "panda_joint2": -0.785398, + "panda_joint3": 0.0, + "panda_joint4": -2.356194, + "panda_joint5": 0.0, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init + diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py new file mode 100644 index 000000000..39bc93fc0 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -0,0 +1,227 @@ +"""Stage 1: Simple Approach and Grasp task for spoon object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the spoon object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import os +import importlib.util + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task("pick_place.approach_grasp_simple_spoon", "pick_place_approach_grasp_simple_spoon") +class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for spoon object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses spoon mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_spoon_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # Use actual spoon mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Use actual table mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + fix_base_link=True, + ), + # Basket for visualization (matches object_layout.py) + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads spoon, table, basket, and trajectory markers + from saved_poses_20251206_spoon_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251206_spoon_basket.py" + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Spoon as the object to pick + "object": saved_poses["objects"]["spoon"], + "table": saved_poses["objects"]["table"], + # Basket for visualization (if present in saved poses) + "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + else: + # Default poses (fallback) + init = [ + { + "objects": { + "object": { + "pos": torch.tensor([0.654277, -0.345737, 0.020000]), + "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + }, + "table": { + "pos": torch.tensor([0.499529, 0.253941, 0.200000]), + "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + }, + # Trajectory waypoints (world coordinates) + "traj_marker_0": { + "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.560000, -0.110000, 0.360000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.530000, 0.010000, 0.470000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.510000, 0.130000, 0.460000]), + "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.04, + "panda_finger_joint2": 0.04, + "panda_joint1": 0.0, + "panda_joint2": -0.785398, + "panda_joint3": 0.0, + "panda_joint4": -2.356194, + "panda_joint5": 0.0, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/base.py b/roboverse_pack/tasks/pick_place/base.py index 46e41be62..b80bed175 100644 --- a/roboverse_pack/tasks/pick_place/base.py +++ b/roboverse_pack/tasks/pick_place/base.py @@ -336,6 +336,18 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: target_pos = self.waypoint_positions[self.current_waypoint_idx] distance = torch.norm(ee_pos - target_pos, dim=-1) + # Progress reward: reward for reducing distance to waypoint + # This encourages the agent to make progress toward the current waypoint + # Only reward progress if we haven't reached this waypoint yet + not_already_reached = ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + distance_reduction = self.prev_distance_to_waypoint - distance + # Scale progress reward: 0.1 means progress reward is 10% of waypoint reached bonus + # This provides continuous guidance without overwhelming the sparse reward + progress_reward_component = torch.clamp( + distance_reduction * self.w_tracking_progress * 0.1, + min=0.0 + ) * not_already_reached.float() * grasped_mask.float() + # Distance-based reward (far + near) / 2 distance_reward_far = 1 - torch.tanh(1.0 * distance) distance_reward_near = 1 - torch.tanh(10.0 * distance) @@ -358,10 +370,13 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Both distance and rotation must be satisfied to consider as reached reached = distance_reached & rotation_reached - newly_reached = reached & ( - ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] - ) - progress_reward = newly_reached.float() * self.w_tracking_progress + newly_reached = reached & not_already_reached + # Waypoint reached bonus (one-time large reward when reaching a waypoint) + waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress + + # Update prev_distance for next step (only if not newly reached, + # because if newly reached, we'll update it when advancing to next waypoint) + self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] if newly_reached.any(): if newly_reached[0]: @@ -405,7 +420,12 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: (1 - torch.tanh(1.0 * distance_to_last[completed_mask])) * self.w_tracking_approach, ) - tracking_reward = torch.where(all_reached, maintain_reward, approach_reward + progress_reward) + # Combine all reward components: approach reward + progress reward + waypoint reached bonus + tracking_reward = torch.where( + all_reached, + maintain_reward, + approach_reward + progress_reward_component + waypoint_reached_bonus + ) return tracking_reward diff --git a/roboverse_pack/tasks/pick_place/track_spoon.py b/roboverse_pack/tasks/pick_place/track_spoon.py new file mode 100644 index 000000000..b195dd25e --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_spoon.py @@ -0,0 +1,614 @@ +"""Stage 3: Track task for trajectory tracking with spoon object. + +This task inherits from PickPlaceBase and implements track functionality +with spoon-specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import os +import pickle +import importlib.util +from copy import deepcopy + +import numpy as np +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 2.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 +# Increase reach threshold for spoon (more lenient for higher waypoints) +DEFAULT_CONFIG_TRACK["trajectory_tracking"]["reach_threshold"] = 0.08 # Increased from 0.05 to 0.08m + + +@register_task("pick_place.track_spoon", "pick_place_track_spoon") +class PickPlaceTrackSpoon(PickPlaceBase): + """Trajectory tracking task for spoon object. + + This task inherits from PickPlaceTrack and customizes: + - Scenario: Uses spoon mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_spoon_basket.py and forces gripper closed + """ + + scenario = ScenarioCfg( + objects=[ + # Use actual spoon mesh from EmbodiedGenData (matches approach_grasp.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Use actual table mesh from EmbodiedGenData (matches approach_grasp.py) + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + fix_base_link=True, + ), + # Basket for visualization (matches approach_grasp.py) + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + # Trajectory waypoint markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 300 # Increased to allow more time to reach all waypoints + + def __init__(self, scenario, device=None, state_file_path=None): + # Use state_file_path from config if provided, otherwise use default + if state_file_path is not None: + self.state_file_path = state_file_path + else: + # Default state file path for spoon task + self.state_file_path = ( + "eval_states/pick_place.approach_grasp_simple_franka_lift_states_154states_20251207_171537.pkl" + ) + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + # Override reach_threshold with more lenient value for spoon task + self.reach_threshold = DEFAULT_CONFIG_TRACK["trajectory_tracking"]["reach_threshold"] + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 1.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file with spoon-specific enhancements.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Load trajectory marker positions from saved poses file (spoon-specific enhancement) + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251206_spoon_basket.py" + ) + + saved_traj_markers = None + saved_table = None + saved_basket = None + + if os.path.exists(saved_poses_path): + try: + import importlib.util + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + + # Extract trajectory markers + saved_traj_markers = {} + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name in saved_poses["objects"]: + saved_traj_markers[marker_name] = saved_poses["objects"][marker_name] + + # Extract table and basket if present + if "table" in saved_poses["objects"]: + saved_table = saved_poses["objects"]["table"] + if "basket" in saved_poses["objects"]: + saved_basket = saved_poses["objects"]["basket"] + + log.info(f"Loaded trajectory markers from {saved_poses_path}") + except Exception as e: + log.warning(f"Failed to load saved poses from {saved_poses_path}: {e}") + + # Default waypoint positions (fallback if saved poses not available) + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + # Force gripper to be fully closed for proper grasping + if "robots" in initial_state and robot_name in initial_state["robots"]: + robot_state = initial_state["robots"][robot_name] + if "dof_pos" in robot_state: + dof_pos = robot_state["dof_pos"] + # Set finger joints to 0.0 (fully closed) + if "panda_finger_joint1" in dof_pos: + dof_pos["panda_finger_joint1"] = 0.0 + if "panda_finger_joint2" in dof_pos: + dof_pos["panda_finger_joint2"] = 0.0 + log.debug(f"[Env {env_idx}] Forced gripper closed: finger1={dof_pos.get('panda_finger_joint1', 'N/A')}, finger2={dof_pos.get('panda_finger_joint2', 'N/A')}") + + # Update table position from saved poses if available + if saved_table is not None and "table" in initial_state["objects"]: + initial_state["objects"]["table"]["pos"] = saved_table["pos"].to(device) if isinstance(saved_table["pos"], torch.Tensor) else torch.tensor(saved_table["pos"], device=device) + initial_state["objects"]["table"]["rot"] = saved_table["rot"].to(device) if isinstance(saved_table["rot"], torch.Tensor) else torch.tensor(saved_table["rot"], device=device) + + # Add basket from saved poses if available + if saved_basket is not None: + basket_pos = saved_basket["pos"].to(device) if isinstance(saved_basket["pos"], torch.Tensor) else torch.tensor(saved_basket["pos"], device=device) + basket_rot = saved_basket["rot"].to(device) if isinstance(saved_basket["rot"], torch.Tensor) else torch.tensor(saved_basket["rot"], device=device) + initial_state["objects"]["basket"] = { + "pos": basket_pos, + "rot": basket_rot, + } + + # Use trajectory markers from saved poses if available, otherwise use defaults + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if saved_traj_markers is not None and marker_name in saved_traj_markers: + # Use saved marker position + marker_data = saved_traj_markers[marker_name] + marker_pos = marker_data["pos"].to(device) if isinstance(marker_data["pos"], torch.Tensor) else torch.tensor(marker_data["pos"], device=device) + marker_rot = marker_data["rot"].to(device) if isinstance(marker_data["rot"], torch.Tensor) else torch.tensor(marker_data["rot"], device=device) + initial_state["objects"][marker_name] = { + "pos": marker_pos, + "rot": marker_rot, + } + elif i < len(default_positions): + # Use default position + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + # Force gripper to be closed in _last_action (spoon-specific enhancement) + self._last_action[:, 0] = 0.0 # panda_finger_joint1 + self._last_action[:, 1] = 0.0 # panda_finger_joint2 + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + # Force gripper to be closed in _last_action for reset envs (spoon-specific enhancement) + self._last_action[env_ids, 0] = 0.0 # panda_finger_joint1 + self._last_action[env_ids, 1] = 0.0 # panda_finger_joint2 + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states().robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Enhanced grasp detection: check actual gripper joint positions + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + gripper_joint_pos = updated_states.robots[self.robot_name].joint_pos[:, :2] + gripper_actually_closed = gripper_joint_pos.mean(dim=-1) < 0.02 + + is_grasping = (gripper_box_dist < self.grasp_check_distance) & gripper_actually_closed + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + env_idx = 0 + log.warning( + f"[Env {env_idx}] Object released during tracking! " + f"Distance: {gripper_box_dist[env_idx].item():.4f}m, " + f"Gripper joint pos: {gripper_joint_pos[env_idx].cpu().numpy()}, " + f"Gripper closed: {gripper_actually_closed[env_idx].item()}" + ) + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: + """Override reward calculation with increased progress reward for later waypoints. + + This addresses the issue where agent gets stuck after waypoint 2 by: + 1. Increasing progress reward multiplier (from 0.1 to 0.3) for better guidance + 2. Adding adaptive scaling for later waypoints to encourage progress + """ + ee_pos, _ = self._get_ee_state(env_states) + grasped_mask = self.object_grasped + tracking_reward = torch.zeros(self.num_envs, device=self.device) + + if grasped_mask.any(): + target_pos = self.waypoint_positions[self.current_waypoint_idx] + distance = torch.norm(ee_pos - target_pos, dim=-1) + + # Progress reward with higher multiplier for better guidance + not_already_reached = ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + distance_reduction = self.prev_distance_to_waypoint - distance + + # Adaptive progress reward: higher for later waypoints (2, 3, 4) to encourage progress + # Waypoints 0-1: 0.2 (20%), Waypoints 2+: 0.4 (40%) to overcome larger distances + # Create multiplier tensor matching num_envs shape + progress_multiplier = torch.where( + self.current_waypoint_idx >= 2, + torch.full((self.num_envs,), 0.4, device=self.device), + torch.full((self.num_envs,), 0.2, device=self.device) + ) + + progress_reward_component = torch.clamp( + distance_reduction * self.w_tracking_progress * progress_multiplier, + min=0.0 + ) * not_already_reached.float() * grasped_mask.float() + + # Distance-based reward (far + near) / 2 + distance_reward_far = 1 - torch.tanh(1.0 * distance) + distance_reward_near = 1 - torch.tanh(10.0 * distance) + approach_reward = (distance_reward_far + distance_reward_near) / 2.0 + approach_reward = approach_reward * self.w_tracking_approach * grasped_mask.float() + + # Check distance condition + distance_reached = (distance < self.reach_threshold) & grasped_mask + + # Check rotation condition if rotation tracking is enabled + rotation_reached = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + rot_err = None + if self.enable_rotation_tracking: + from metasim.utils.math import matrix_from_quat + box_quat = env_states.objects["object"].root_state[:, 3:7] + box_mat = matrix_from_quat(box_quat).reshape(self.num_envs, 9) + target_quat = self.waypoint_rotations[self.current_waypoint_idx] + target_mat = matrix_from_quat(target_quat).reshape(self.num_envs, 9) + rot_err = torch.norm(target_mat[:, :6] - box_mat[:, :6], dim=-1) + rotation_reached = rot_err < self.rotation_error_threshold + + # Both distance and rotation must be satisfied to consider as reached + reached = distance_reached & rotation_reached + newly_reached = reached & not_already_reached + # Waypoint reached bonus (one-time large reward when reaching a waypoint) + waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress + + # Update prev_distance for next step + self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] + + if newly_reached.any(): + if newly_reached[0]: + wp_idx = self.current_waypoint_idx[0].item() + rot_info = "" + if self.enable_rotation_tracking and rot_err is not None: + rot_info = f", Rotation error: {rot_err[0].item():.4f} < {self.rotation_error_threshold}" + log.info( + f"[Env 0] Reached waypoint #{wp_idx}! Distance: {distance[0].item():.4f}m < {self.reach_threshold}m{rot_info}" + ) + + self.waypoints_reached[newly_reached, self.current_waypoint_idx[newly_reached]] = True + + # Advance to next waypoint if not at the last one + can_advance = newly_reached & (self.current_waypoint_idx < self.num_waypoints - 1) + + if can_advance.any() and can_advance[0]: + old_idx = self.current_waypoint_idx[0].item() + new_idx = old_idx + 1 + log.info(f" -> Advancing to waypoint #{new_idx}") + + self.current_waypoint_idx[can_advance] += 1 + + if can_advance.any(): + new_target_pos = self.waypoint_positions[self.current_waypoint_idx[can_advance]] + self.prev_distance_to_waypoint[can_advance] = torch.norm( + ee_pos[can_advance] - new_target_pos, dim=-1 + ) + + maintain_reward = torch.zeros(self.num_envs, device=self.device) + all_reached = self.waypoints_reached.all(dim=1) + completed_mask = all_reached & grasped_mask + + if completed_mask.any(): + last_target_pos = self.waypoint_positions[-1].unsqueeze(0).expand(self.num_envs, -1) + distance_to_last = torch.norm(ee_pos - last_target_pos, dim=-1) + + maintain_reward[completed_mask] = torch.where( + distance_to_last[completed_mask] < self.reach_threshold, + torch.full((completed_mask.sum(),), 5, device=self.device), + (1 - torch.tanh(1.0 * distance_to_last[completed_mask])) * self.w_tracking_approach, + ) + + # Combine all reward components + tracking_reward = torch.where( + all_reached, + maintain_reward, + approach_reward + progress_reward_component + waypoint_reached_bonus + ) + + return tracking_reward From 7e6fed2069f131fff2f6859dcda7668da18c8374 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Thu, 11 Dec 2025 17:53:56 -0800 Subject: [PATCH 12/37] "Refactor approach_grasp tasks, yaml, and track tasks yaml" --- .../fast_td3/configs/pick_place_banana.yaml | 12 +- .../configs/pick_place_screwdriver.yaml | 12 +- .../rl/fast_td3/configs/track_banana.yaml | 18 +- .../fast_td3/configs/track_screwdriver.yaml | 18 +- .../tasks/pick_place/approach_grasp.py | 421 +++++++++++++++ .../tasks/pick_place/approach_grasp_banana.py | 486 ++++++------------ .../pick_place/approach_grasp_screwdriver.py | 460 +++++------------ 7 files changed, 730 insertions(+), 697 deletions(-) create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml index 43d6f5562..c4c2b9d7e 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml @@ -1,15 +1,15 @@ -# Base Configuration for FastTD3 Training -# Default configuration with IsaacGym simulator and H1 humanoid robot +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.approach_grasp_banana" +task: "pick_place.approach_grasp_simple_banana" decimation: 4 train_or_eval: "train" -headless: True +headless: False # ------------------------------------------------------------------------------- # Seeds & Device @@ -78,9 +78,11 @@ measure_burnin: 3 # Logging & Checkpointing # ------------------------------------------------------------------------------- wandb_project: "get_started_fttd3" -exp_name: "get_started_fttd3" +exp_name: "get_started_fttd3_banana" use_wandb: false checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana" # Unique run name +model_dir: "models/banana" # Separate directory for banana checkpoints eval_interval: 5000 save_interval: 5000 video_width: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml index 3bb351508..5b776f62d 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_screwdriver.yaml @@ -1,15 +1,15 @@ -# Base Configuration for FastTD3 Training -# Default configuration with IsaacGym simulator and H1 humanoid robot +# Base Configuration for FastTD3 Training - Screwdriver +# Default configuration with IsaacGym simulator and Franka robot # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.approach_grasp_screwdriver" +task: "pick_place.approach_grasp_simple_screwdriver" decimation: 4 train_or_eval: "train" -headless: True +headless: False # ------------------------------------------------------------------------------- # Seeds & Device @@ -78,9 +78,11 @@ measure_burnin: 3 # Logging & Checkpointing # ------------------------------------------------------------------------------- wandb_project: "get_started_fttd3" -exp_name: "get_started_fttd3" +exp_name: "get_started_fttd3_screwdriver" use_wandb: false checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_screwdriver" # Unique run name +model_dir: "models/screwdriver" # Separate directory for screwdriver checkpoints eval_interval: 5000 save_interval: 5000 video_width: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml index 4fe16b044..7ff65a07f 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -1,19 +1,21 @@ -# Configuration for FastTD3 Training - Track Task -# Stage 3: Track task - for training trajectory tracking -# Starts from saved grasp states, only trains trajectory tracking +# Configuration for FastTD3 Training - Track Task (Banana) +# Stage 3: Track task for banana object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_banana, only trains trajectory tracking # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.track_banana" +task: "pick_place.track_banana" # Uses banana-specific track task decimation: 4 train_or_eval: "train" headless: False # State file path for track task (pkl file path for grasp states) -# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +# Note: This is for reference/documentation. The actual path is set in track_banana.py __init__ +# After running evaluate_lift.py for the banana task, update the state_file_path in track_banana.py +# to point to the generated states file (e.g., from eval_states/ directory) state_file_path: "eval_states/pick_place.approach_grasp_banana_franka_lift_states_100states_20251210_153518.pkl" # ------------------------------------------------------------------------------- @@ -83,10 +85,12 @@ measure_burnin: 3 # ------------------------------------------------------------------------------- # Logging & Checkpointing # ------------------------------------------------------------------------------- -wandb_project: "pick_place_track" -exp_name: "pick_place_track" +wandb_project: "pick_place_track_banana" +exp_name: "pick_place_track_banana" use_wandb: false checkpoint_path: null +run_name: "pick_place.track_banana" # Unique run name for banana track task +model_dir: "models/track_banana" # Separate directory for track banana checkpoints eval_interval: 5000 save_interval: 5000 video_width: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml index ea9360e3f..bf889a22d 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml @@ -1,19 +1,21 @@ -# Configuration for FastTD3 Training - Track Task -# Stage 3: Track task - for training trajectory tracking -# Starts from saved grasp states, only trains trajectory tracking +# Configuration for FastTD3 Training - Track Task (Screwdriver) +# Stage 3: Track task for screwdriver object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_screwdriver, only trains trajectory tracking # ------------------------------------------------------------------------------- # Environment # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.track_screwdriver" +task: "pick_place.track_screwdriver" # Uses screwdriver-specific track task decimation: 4 train_or_eval: "train" headless: False # State file path for track task (pkl file path for grasp states) -# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +# Note: This is for reference/documentation. The actual path is set in track_screwdriver.py __init__ +# After running evaluate_lift.py for the screwdriver task, update the state_file_path in track_screwdriver.py +# to point to the generated states file (e.g., from eval_states/ directory) state_file_path: "eval_states/pick_place.approach_grasp_screwdriver_franka_lift_states_100states_20251210_154907.pkl" # ------------------------------------------------------------------------------- @@ -83,10 +85,12 @@ measure_burnin: 3 # ------------------------------------------------------------------------------- # Logging & Checkpointing # ------------------------------------------------------------------------------- -wandb_project: "pick_place_track" -exp_name: "pick_place_track" +wandb_project: "pick_place_track_screwdriver" +exp_name: "pick_place_track_screwdriver" use_wandb: false checkpoint_path: null +run_name: "pick_place.track_screwdriver" # Unique run name for screwdriver track task +model_dir: "models/track_screwdriver" # Separate directory for track screwdriver checkpoints eval_interval: 5000 save_interval: 5000 video_width: 1024 diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py new file mode 100644 index 000000000..4a3b0ff02 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -0,0 +1,421 @@ +"""Stage 1: Simple Approach and Grasp task with gripper control. + +This task focuses on learning to approach the object, grasp it with gripper, and lift it. +Simple gripper control: close when near the object. +""" + +from __future__ import annotations + +from copy import deepcopy + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +@register_task("pick_place.approach_grasp_simple", "pick_place_approach_grasp_simple") +class PickPlaceApproachGraspSimple(PickPlaceBase): + """Simple Approach and Grasp task with gripper control. + + This task focuses on: + - Approaching the object + - Grasping the object with simple gripper control (close when near) + + Success condition: Object is grasped (reward given when entering grasp state). + Episode terminates if object is released. + """ + + GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing + GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp + + # Joint2 lift parameters (for franka: panda_joint2) + JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) + JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control + JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step + + DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) + DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ + "gripper_approach": 0.5, + "grasp_reward": 4.0, + "gripper_downward_alignment": 0.1, + }) + DEFAULT_CONFIG_SIMPLE["grasp_config"] = { + "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, + "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, + } + + scenario = ScenarioCfg( + objects=[ + PrimitiveCubeCfg( + name="object", + size=(0.04, 0.04, 0.04), + mass=0.02, + physics=PhysicStateType.RIGIDBODY, + color=(1.0, 0.0, 0.0), + ), + PrimitiveCubeCfg( + name="table", + size=(0.2, 0.3, 0.4), + mass=10.0, + physics=PhysicStateType.RIGIDBODY, + color=(0.8, 0.6, 0.4), + fix_base_link=True, + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Placeholders needed during super().__init__ (reset may be called there) + self.object_grasped = None + self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 + self._grasp_notified = None + self._distance_history = None # History buffer for stable grasp check + self.joint2_name = "panda_joint2" + self.joint2_index = None + self.initial_joint_pos = None + + super().__init__(scenario, device) + + # Override reward functions for this task + self.reward_functions = [ + self._reward_gripper_approach, + self._reward_grasp, + self._reward_gripper_downward_alignment, + ] + self.reward_weights = [ + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_downward_alignment"], + ] + + # Get config values + grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] + self.grasp_check_distance = grasp_config["grasp_check_distance"] + self.gripper_close_distance = grasp_config["gripper_close_distance"] + + # Initialize tracking buffers + self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + + # Find joint2 index + joint_names = self.handler.get_joint_names(self.robot_name, sort=True) + if self.joint2_name in joint_names: + self.joint2_index = joint_names.index(self.joint2_name) + else: + log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") + + def reset(self, env_ids=None): + """Reset environment and tracking variables.""" + obs, info = super().reset(env_ids=env_ids) + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + + # Reset grasp tracking + self.object_grasped[env_ids_tensor] = False + if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + else: + self._grasp_notified[env_ids_tensor] = False + self._distance_history[env_ids_tensor] = float("inf") + + # Store initial joint positions if not already stored + if self.initial_joint_pos is None: + states = self.handler.get_states(mode="tensor") + self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() + + return obs, info + + def step(self, actions): + """Step with delta control and simple gripper control.""" + current_states = self.handler.get_states(mode="tensor") + box_pos = current_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(current_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near object + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + # Call RLTaskEnv.step directly + # Note: reward functions will be called inside super().step() + # and they will compute newly_grasped by comparing current state with self.object_grasped + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step (for next step's comparison) + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_grasped] = True + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_released] = False + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _apply_simple_gripper_control(self, actions, gripper_box_dist): + """Simple gripper control: close when near object.""" + # Close gripper when close to object + gripper_close = gripper_box_dist < self.gripper_close_distance + gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed + gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open + + # Set gripper joints + for gripper_idx in self.gripper_joint_indices: + actions[:, gripper_idx] = torch.where( + gripper_close, + gripper_value_close, + gripper_value_open, + ) + + return actions + + def _apply_joint2_lift_control(self, actions, current_states): + """Apply joint2 lift control when object is grasped.""" + if self.initial_joint_pos is None: + self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() + + joint_pos = current_states.robots[self.robot_name].joint_pos + joint2_idx = self.joint2_index + + # Target position: initial position + lift offset (positive offset lifts up) + target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET + joint_error = target_lift - joint_pos[:, joint2_idx] + + # Apply proportional control with max delta limit + desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error + delta = torch.clamp( + desired - joint_pos[:, joint2_idx], + -self.JOINT2_LIFT_MAX_DELTA, + self.JOINT2_LIFT_MAX_DELTA, + ) + joint2_value = torch.clamp( + joint_pos[:, joint2_idx] + delta, + self._action_low[joint2_idx], + self._action_high[joint2_idx], + ) + + # Apply lift control only to environments where object is grasped + actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] + + return actions + + def _compute_grasp_state(self, states): + """Compute if object is grasped (requires 5 stable frames based on distance only).""" + box_pos = states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _reward_gripper_approach(self, env_states) -> torch.Tensor: + """Reward for gripper approaching the box.""" + box_pos = env_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(env_states) + gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) + + approach_reward_far = 1 - torch.tanh(gripper_box_dist) + approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) + return approach_reward_far + approach_reward_near + + def _reward_grasp(self, env_states) -> torch.Tensor: + """Reward for maintaining grasp state (continuous reward while grasped).""" + # Use cached grasp state (computed in step method) + return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments.""" + init = [ + { + "objects": { + "object": { + "pos": torch.tensor([0.654277, -0.345737, 0.020000]), + "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + }, + "table": { + "pos": torch.tensor([0.499529, 0.253941, 0.200000]), + "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + }, + # Trajectory waypoints (world coordinates) + "traj_marker_0": { + "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.560000, -0.110000, 0.360000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.530000, 0.010000, 0.470000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.510000, 0.130000, 0.460000]), + "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.04, + "panda_finger_joint2": 0.04, + "panda_joint1": 0.0, + "panda_joint2": -0.785398, + "panda_joint3": 0.0, + "panda_joint4": -2.356194, + "panda_joint5": 0.0, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init \ No newline at end of file diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index a8e6ed6c4..a66939722 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -1,12 +1,13 @@ -"""Stage 1: Simple Approach and Grasp task with gripper control. +"""Stage 1: Simple Approach and Grasp task for banana object. -This task focuses on learning to approach the object, grasp it with gripper, and lift it. -Simple gripper control: close when near the object. +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. """ from __future__ import annotations -from copy import deepcopy +import os +import importlib.util import torch from loguru import logger as log @@ -15,40 +16,18 @@ from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task -from metasim.utils.math import matrix_from_quat -from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_banana", "pick_place_approach_grasp_banana") -class PickPlaceApproachGraspBanana(PickPlaceBase): - """Simple Approach and Grasp task with gripper control. +@register_task("pick_place.approach_grasp_simple_banana", "pick_place_approach_grasp_simple_banana") +class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. - This task focuses on: - - Approaching the object - - Grasping the object with simple gripper control (close when near) - - Success condition: Object is grasped (reward given when entering grasp state). - Episode terminates if object is released. + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py """ - GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing - GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp - - # Joint2 lift parameters (for franka: panda_joint2) - JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) - JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control - JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step - - DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) - DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ - "gripper_approach": 0.5, - "grasp_reward": 4.0, - }) - DEFAULT_CONFIG_SIMPLE["grasp_config"] = { - "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, - "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, - } - scenario = ScenarioCfg( objects=[ RigidObjCfg( @@ -59,14 +38,6 @@ class PickPlaceApproachGraspBanana(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", ), - RigidObjCfg( - name="object", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", - ), RigidObjCfg( name="lamp", scale=(1, 1, 1), @@ -92,12 +63,21 @@ class PickPlaceApproachGraspBanana(PickPlaceBase): mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", ), RigidObjCfg( - name="screw_driver", - scale=(1.5, 1.5, 1.5), + name="cutting_tools", + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual banana mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", ), RigidObjCfg( name="spoon", @@ -172,309 +152,129 @@ class PickPlaceApproachGraspBanana(PickPlaceBase): ) max_episode_steps = 200 - def __init__(self, scenario, device=None): - # Placeholders needed during super().__init__ (reset may be called there) - self.object_grasped = None - self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 - self._grasp_notified = None - self._distance_history = None # History buffer for stable grasp check - self.joint2_name = "panda_joint2" - self.joint2_index = None - self.initial_joint_pos = None - - super().__init__(scenario, device) - - # Override reward functions for this task - self.reward_functions = [ - self._reward_gripper_approach, - self._reward_grasp, - self._reward_gripper_downward_alignment, - ] - self.reward_weights = [ - self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], - self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], - 0.1, # weight for keeping the gripper pointing downward - ] - - # Get config values - grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] - self.grasp_check_distance = grasp_config["grasp_check_distance"] - self.gripper_close_distance = grasp_config["gripper_close_distance"] - - # Initialize tracking buffers - self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251206_banana_basket.py" ) - - # Find joint2 index - joint_names = self.handler.get_joint_names(self.robot_name, sort=True) - if self.joint2_name in joint_names: - self.joint2_index = joint_names.index(self.joint2_name) - else: - log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") - - def reset(self, env_ids=None): - """Reset environment and tracking variables.""" - obs, info = super().reset(env_ids=env_ids) - - if env_ids is None: - env_ids_tensor = torch.arange(self.num_envs, device=self.device) - else: - env_ids_tensor = ( - torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids - ) - - # Reset grasp tracking - self.object_grasped[env_ids_tensor] = False - if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: - self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses else: - self._grasp_notified[env_ids_tensor] = False - self._distance_history[env_ids_tensor] = float("inf") - - # Store initial joint positions if not already stored - if self.initial_joint_pos is None: - states = self.handler.get_states(mode="tensor") - self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() - - return obs, info - - def step(self, actions): - """Step with delta control and simple gripper control.""" - current_states = self.handler.get_states(mode="tensor") - box_pos = current_states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(current_states) - gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) - - # Apply delta control - delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions - real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - - # Simple gripper control: close when near object - real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) - - # Apply joint2 lift control if grasped - if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: - real_actions = self._apply_joint2_lift_control(real_actions, current_states) - - # Bypass PickPlaceBase.step to avoid its gripper control logic - # Call RLTaskEnv.step directly - # Note: reward functions will be called inside super().step() - # and they will compute newly_grasped by comparing current state with self.object_grasped - obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) - self._last_action = real_actions - - # Update grasp state after step (for next step's comparison) - updated_states = self.handler.get_states(mode="tensor") - old_grasped = self.object_grasped.clone() - self.object_grasped = self._compute_grasp_state(updated_states) - - newly_grasped = self.object_grasped & (~old_grasped) - newly_released = (~self.object_grasped) & old_grasped - - if newly_grasped.any() and newly_grasped[0]: - log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_grasped] = True - - if newly_released.any() and newly_released[0]: - log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_released] = False - - # Terminate episode if object is released - terminated = terminated | newly_released - - # Track lift state: check if joint2 has been lifted significantly - lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - if self.joint2_index is not None and self.initial_joint_pos is not None: - current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] - initial_joint2 = self.initial_joint_pos[:, self.joint2_index] - # Lift is active if joint2 has moved up significantly (more than 0.1 radians) - lift_active = (current_joint2 - initial_joint2) > 0.1 - - info["grasp_success"] = self.object_grasped - info["lift_active"] = lift_active - info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) - - return obs, reward, terminated, time_out, info - - def _apply_simple_gripper_control(self, actions, gripper_box_dist): - """Simple gripper control: close when near object.""" - # Close gripper when close to object - gripper_close = gripper_box_dist < self.gripper_close_distance - gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed - gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open - - # Set gripper joints - for gripper_idx in self.gripper_joint_indices: - actions[:, gripper_idx] = torch.where( - gripper_close, - gripper_value_close, - gripper_value_open, - ) - - return actions - - def _apply_joint2_lift_control(self, actions, current_states): - """Apply joint2 lift control when object is grasped.""" - if self.initial_joint_pos is None: - self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() - - joint_pos = current_states.robots[self.robot_name].joint_pos - joint2_idx = self.joint2_index - - # Target position: initial position + lift offset (positive offset lifts up) - target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET - joint_error = target_lift - joint_pos[:, joint2_idx] - - # Apply proportional control with max delta limit - desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error - delta = torch.clamp( - desired - joint_pos[:, joint2_idx], - -self.JOINT2_LIFT_MAX_DELTA, - self.JOINT2_LIFT_MAX_DELTA, - ) - joint2_value = torch.clamp( - joint_pos[:, joint2_idx] + delta, - self._action_low[joint2_idx], - self._action_high[joint2_idx], - ) - - # Apply lift control only to environments where object is grasped - actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] - - return actions - - def _compute_grasp_state(self, states): - """Compute if object is grasped (requires 5 stable frames based on distance only).""" - box_pos = states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(states) - gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) - - # Update rolling distance history - if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) - self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) - self._distance_history[:, -1] = gripper_box_dist - - # Object is grasped if distance has been stable (close) for 5 frames - stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) - is_grasping = stable_grasp - - return is_grasping - - def _reward_gripper_approach(self, env_states) -> torch.Tensor: - """Reward for gripper approaching the box.""" - box_pos = env_states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(env_states) - gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) - - approach_reward_far = 1 - torch.tanh(gripper_box_dist) - approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) - return approach_reward_far + approach_reward_near - - def _reward_grasp(self, env_states) -> torch.Tensor: - """Reward for maintaining grasp state (continuous reward while grasped).""" - # Use cached grasp state (computed in step method) - return self.object_grasped.float() - - def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: - """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" - _, gripper_quat = self._get_ee_state(env_states) - gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) - gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame - - down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) - alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z - # Map cosine (1 best, -1 worst) to [0,1] reward - return (alignment + 1.0) / 2.0 - - def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments.""" - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), }, - "lamp": { - "pos": torch.tensor([0.680000, 0.310000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "robots": { + "franka": saved_poses["robots"]["franka"], }, - "basket": { - "pos": torch.tensor([0.220000, 0.200000, 0.955000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.620000, -0.080000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "screw_driver": { - "pos": torch.tensor([0.530000, -0.410000, 0.811000]), - "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), - }, - "spoon": { - "pos": torch.tensor([0.530000, -0.690000, 0.850000]), - "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), - }, - "object": { - "pos": torch.tensor([0.280000, -0.280000, 0.825000]), - "rot": torch.tensor([0.889292, -0.000000, 0.000001, -0.457338]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.260000, -0.240000, 0.850000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.270000, -0.190000, 0.970000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.260000, -0.140000, 1.160000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.250000, -0.030000, 1.240000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.310000, 0.190000, 1.160000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.220000, 0.200000, 0.955000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.530000, -0.410000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.280000, 0.825000]), + "rot": torch.tensor([0.889292, -0.000000, 0.000001, -0.457338]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.260000, -0.240000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.270000, -0.190000, 0.970000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.260000, -0.140000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.250000, -0.030000, 1.240000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.310000, 0.190000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.475731, -0.000000, -0.000001, 0.879590]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.475731, -0.000000, -0.000001, 0.879590]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, }, }, - }, - } - for _ in range(self.num_envs) - ] + } + for _ in range(self.num_envs) + ] return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index d72a14d50..b30f5e61f 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -1,12 +1,13 @@ -"""Stage 1: Simple Approach and Grasp task with gripper control. +"""Stage 1: Simple Approach and Grasp task for screwdriver object. -This task focuses on learning to approach the object, grasp it with gripper, and lift it. -Simple gripper control: close when near the object. +This task inherits from PickPlaceApproachGraspSimple and customizes it for the screwdriver object +with specific mesh configurations and saved poses from object_layout.py. """ from __future__ import annotations -from copy import deepcopy +import os +import importlib.util import torch from loguru import logger as log @@ -15,40 +16,18 @@ from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task -from metasim.utils.math import matrix_from_quat -from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_screwdriver", "pick_place_approach_screwdriver") -class PickPlaceApproachGraspScrewDriver(PickPlaceBase): - """Simple Approach and Grasp task with gripper control. +@register_task("pick_place.approach_grasp_simple_screwdriver", "pick_place_approach_grasp_simple_screwdriver") +class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for screwdriver object. - This task focuses on: - - Approaching the object - - Grasping the object with simple gripper control (close when near) - - Success condition: Object is grasped (reward given when entering grasp state). - Episode terminates if object is released. + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses screwdriver mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_screwdriver_basket.py """ - GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing - GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp - - # Joint2 lift parameters (for franka: panda_joint2) - JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) - JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control - JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step - - DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) - DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ - "gripper_approach": 0.5, - "grasp_reward": 4.0, - }) - DEFAULT_CONFIG_SIMPLE["grasp_config"] = { - "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, - "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, - } - scenario = ScenarioCfg( objects=[ RigidObjCfg( @@ -91,6 +70,7 @@ class PickPlaceApproachGraspScrewDriver(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", ), + # Use actual screwdriver mesh from EmbodiedGenData (matches object_layout.py) RigidObjCfg( name="object", scale=(1.5, 1.5, 1.5), @@ -172,309 +152,129 @@ class PickPlaceApproachGraspScrewDriver(PickPlaceBase): ) max_episode_steps = 200 - def __init__(self, scenario, device=None): - # Placeholders needed during super().__init__ (reset may be called there) - self.object_grasped = None - self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 - self._grasp_notified = None - self._distance_history = None # History buffer for stable grasp check - self.joint2_name = "panda_joint2" - self.joint2_index = None - self.initial_joint_pos = None - - super().__init__(scenario, device) - - # Override reward functions for this task - self.reward_functions = [ - self._reward_gripper_approach, - self._reward_grasp, - self._reward_gripper_downward_alignment, - ] - self.reward_weights = [ - self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], - self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], - 0.1, # weight for keeping the gripper pointing downward - ] - - # Get config values - grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] - self.grasp_check_distance = grasp_config["grasp_check_distance"] - self.gripper_close_distance = grasp_config["gripper_close_distance"] - - # Initialize tracking buffers - self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads screwdriver, table, basket, and trajectory markers + from saved_poses_20251206_screwdriver_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", "output", "saved_poses_20251206_screwdriver_basket.py" ) - - # Find joint2 index - joint_names = self.handler.get_joint_names(self.robot_name, sort=True) - if self.joint2_name in joint_names: - self.joint2_index = joint_names.index(self.joint2_name) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses else: - log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") - - def reset(self, env_ids=None): - """Reset environment and tracking variables.""" - obs, info = super().reset(env_ids=env_ids) - - if env_ids is None: - env_ids_tensor = torch.arange(self.num_envs, device=self.device) - else: - env_ids_tensor = ( - torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids - ) - - # Reset grasp tracking - self.object_grasped[env_ids_tensor] = False - if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: - self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) - else: - self._grasp_notified[env_ids_tensor] = False - self._distance_history[env_ids_tensor] = float("inf") - - # Store initial joint positions if not already stored - if self.initial_joint_pos is None: - states = self.handler.get_states(mode="tensor") - self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() - - return obs, info - - def step(self, actions): - """Step with delta control and simple gripper control.""" - current_states = self.handler.get_states(mode="tensor") - box_pos = current_states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(current_states) - gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) - - # Apply delta control - delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions - real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - - # Simple gripper control: close when near object - real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) - - # Apply joint2 lift control if grasped - if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: - real_actions = self._apply_joint2_lift_control(real_actions, current_states) - - # Bypass PickPlaceBase.step to avoid its gripper control logic - # Call RLTaskEnv.step directly - # Note: reward functions will be called inside super().step() - # and they will compute newly_grasped by comparing current state with self.object_grasped - obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) - self._last_action = real_actions - - # Update grasp state after step (for next step's comparison) - updated_states = self.handler.get_states(mode="tensor") - old_grasped = self.object_grasped.clone() - self.object_grasped = self._compute_grasp_state(updated_states) - - newly_grasped = self.object_grasped & (~old_grasped) - newly_released = (~self.object_grasped) & old_grasped - - if newly_grasped.any() and newly_grasped[0]: - log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_grasped] = True - - if newly_released.any() and newly_released[0]: - log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_released] = False - - # Terminate episode if object is released - terminated = terminated | newly_released - - # Track lift state: check if joint2 has been lifted significantly - lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - if self.joint2_index is not None and self.initial_joint_pos is not None: - current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] - initial_joint2 = self.initial_joint_pos[:, self.joint2_index] - # Lift is active if joint2 has moved up significantly (more than 0.1 radians) - lift_active = (current_joint2 - initial_joint2) > 0.1 - - info["grasp_success"] = self.object_grasped - info["lift_active"] = lift_active - info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) - - return obs, reward, terminated, time_out, info - - def _apply_simple_gripper_control(self, actions, gripper_box_dist): - """Simple gripper control: close when near object.""" - # Close gripper when close to object - gripper_close = gripper_box_dist < self.gripper_close_distance - gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed - gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open - - # Set gripper joints - for gripper_idx in self.gripper_joint_indices: - actions[:, gripper_idx] = torch.where( - gripper_close, - gripper_value_close, - gripper_value_open, - ) - - return actions - - def _apply_joint2_lift_control(self, actions, current_states): - """Apply joint2 lift control when object is grasped.""" - if self.initial_joint_pos is None: - self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() - - joint_pos = current_states.robots[self.robot_name].joint_pos - joint2_idx = self.joint2_index - - # Target position: initial position + lift offset (positive offset lifts up) - target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET - joint_error = target_lift - joint_pos[:, joint2_idx] - - # Apply proportional control with max delta limit - desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error - delta = torch.clamp( - desired - joint_pos[:, joint2_idx], - -self.JOINT2_LIFT_MAX_DELTA, - self.JOINT2_LIFT_MAX_DELTA, - ) - joint2_value = torch.clamp( - joint_pos[:, joint2_idx] + delta, - self._action_low[joint2_idx], - self._action_high[joint2_idx], - ) - - # Apply lift control only to environments where object is grasped - actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] - - return actions - - def _compute_grasp_state(self, states): - """Compute if object is grasped (requires 5 stable frames based on distance only).""" - box_pos = states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(states) - gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) - - # Update rolling distance history - if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) - self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) - self._distance_history[:, -1] = gripper_box_dist - - # Object is grasped if distance has been stable (close) for 5 frames - stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) - is_grasping = stable_grasp - - return is_grasping - - def _reward_gripper_approach(self, env_states) -> torch.Tensor: - """Reward for gripper approaching the box.""" - box_pos = env_states.objects["object"].root_state[:, 0:3] - gripper_pos, _ = self._get_ee_state(env_states) - gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) - - approach_reward_far = 1 - torch.tanh(gripper_box_dist) - approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) - return approach_reward_far + approach_reward_near - - def _reward_grasp(self, env_states) -> torch.Tensor: - """Reward for maintaining grasp state (continuous reward while grasped).""" - # Use cached grasp state (computed in step method) - return self.object_grasped.float() - - def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: - """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" - _, gripper_quat = self._get_ee_state(env_states) - gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) - gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame - - down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) - alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z - # Map cosine (1 best, -1 worst) to [0,1] reward - return (alignment + 1.0) / 2.0 - - def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments.""" - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "lamp": { - "pos": torch.tensor([0.680000, 0.310000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.280000, 0.130000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.620000, -0.080000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.640000, -0.320000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Screwdriver as the object to pick + "object": saved_poses["objects"].get("screwdriver", saved_poses["objects"].get("screw_driver", saved_poses["objects"].get("object"))), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), }, - "object": { - "pos": torch.tensor([0.320000, -0.340000, 0.811000]), - "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + "robots": { + "franka": saved_poses["robots"]["franka"], }, - "spoon": { - "pos": torch.tensor([0.530000, -0.690000, 0.850000]), - "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.350000, -0.310000, 0.870000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.330000, -0.290000, 0.980000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.340000, -0.200000, 1.090000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.330000, -0.040000, 1.180000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.310000, 0.150000, 1.160000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_screwdriver.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.280000, 0.130000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.640000, -0.320000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "object": { + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.350000, -0.310000, 0.870000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.330000, -0.290000, 0.980000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.200000, 1.090000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.330000, -0.040000, 1.180000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.310000, 0.150000, 1.160000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, }, }, - }, - } - for _ in range(self.num_envs) - ] + } + for _ in range(self.num_envs) + ] return init From 849ce1de9f805133be0b0f42c91cc3a6b99ff53f Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Thu, 11 Dec 2025 18:19:50 -0800 Subject: [PATCH 13/37] "Refactor pick_place grasp logic and add object configs" --- roboverse_pack/tasks/pick_place/approach_grasp.py | 2 +- .../tasks/pick_place/approach_grasp_banana.py | 10 ++++++---- .../pick_place/approach_grasp_screwdriver.py | 15 ++++++++++----- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py index 4a3b0ff02..9ab913675 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -418,4 +418,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init \ No newline at end of file + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index a66939722..ae7b17967 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -6,8 +6,8 @@ from __future__ import annotations -import os import importlib.util +import os import torch from loguru import logger as log @@ -154,14 +154,16 @@ class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers from saved_poses_20251206_banana_basket.py. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251206_banana_basket.py" + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -173,7 +175,7 @@ def _get_initial_states(self) -> list[dict] | None: # Fallback to default poses if saved file not found log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") saved_poses = None - + if saved_poses is not None: # Use saved poses from object_layout.py init = [] diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index b30f5e61f..e55d5bd3b 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -6,8 +6,8 @@ from __future__ import annotations -import os import importlib.util +import os import torch from loguru import logger as log @@ -154,14 +154,16 @@ class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - + Uses saved poses from object_layout.py. Loads screwdriver, table, basket, and trajectory markers from saved_poses_20251206_screwdriver_basket.py. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251206_screwdriver_basket.py" + "get_started", + "output", + "saved_poses_20251206_screwdriver_basket.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -173,7 +175,7 @@ def _get_initial_states(self) -> list[dict] | None: # Fallback to default poses if saved file not found log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") saved_poses = None - + if saved_poses is not None: # Use saved poses from object_layout.py init = [] @@ -181,7 +183,10 @@ def _get_initial_states(self) -> list[dict] | None: env_state = { "objects": { # Screwdriver as the object to pick - "object": saved_poses["objects"].get("screwdriver", saved_poses["objects"].get("screw_driver", saved_poses["objects"].get("object"))), + "object": saved_poses["objects"].get( + "screwdriver", + saved_poses["objects"].get("screw_driver", saved_poses["objects"].get("object")), + ), "table": saved_poses["objects"]["table"], "lamp": saved_poses["objects"].get("lamp"), "basket": saved_poses["objects"].get("basket"), From 6b49480244237b841ec35f65add2040010ee213e Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Thu, 11 Dec 2025 18:20:35 -0800 Subject: [PATCH 14/37] Refactor pick_place grasp logic and add object configs --- CONTRIBUTORS.md | 1 + get_started/obj_layout/object_layout.py | 2 +- .../rl/fast_td3/configs/pick_place_bowl.yaml | 1 - .../configs/pick_place_cuttingtool.yaml | 1 - .../rl/fast_td3/configs/pick_place_spoon.yaml | 1 - .../rl/fast_td3/configs/track_spoon.yaml | 1 - roboverse_learn/rl/fast_td3/train.py | 2 +- .../tasks/pick_place/approach_grasp_bowl.py | 56 +++++----- .../pick_place/approach_grasp_cuttingtool.py | 39 ++++--- .../tasks/pick_place/approach_grasp_spoon.py | 14 ++- roboverse_pack/tasks/pick_place/base.py | 21 ++-- .../tasks/pick_place/track_banana.py | 2 +- .../tasks/pick_place/track_screwdriver.py | 2 +- .../tasks/pick_place/track_spoon.py | 100 ++++++++++++------ 14 files changed, 142 insertions(+), 101 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 528f39029..680c8e8d3 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -48,6 +48,7 @@ Guidelines for modifications: * Yuyang Li * Zhigen Zhao * Hanchu Zhou +* Boqi Zhao ## Acknowledgements diff --git a/get_started/obj_layout/object_layout.py b/get_started/obj_layout/object_layout.py index 21566b0a4..7487ffb53 100644 --- a/get_started/obj_layout/object_layout.py +++ b/get_started/obj_layout/object_layout.py @@ -803,4 +803,4 @@ def __post_init__(self): keyboard_client.close() handler.close() - log.info(f"Done (steps: {step})") \ No newline at end of file + log.info(f"Done (steps: {step})") diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml index fbea3645c..5b73f22c4 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_bowl.yaml @@ -87,4 +87,3 @@ eval_interval: 5000 save_interval: 5000 video_width: 1024 video_height: 1024 - diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml index 1e4db8bf8..a39ca92ba 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool.yaml @@ -87,4 +87,3 @@ eval_interval: 5000 save_interval: 5000 video_width: 1024 video_height: 1024 - diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml index 49dd350b9..fcde5d7e7 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml @@ -87,4 +87,3 @@ eval_interval: 5000 save_interval: 5000 video_width: 1024 video_height: 1024 - diff --git a/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml b/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml index 7348760a7..f92bd84e1 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_spoon.yaml @@ -95,4 +95,3 @@ eval_interval: 5000 save_interval: 5000 video_width: 1024 video_height: 1024 - diff --git a/roboverse_learn/rl/fast_td3/train.py b/roboverse_learn/rl/fast_td3/train.py index 347b3ca2e..5effe6e06 100644 --- a/roboverse_learn/rl/fast_td3/train.py +++ b/roboverse_learn/rl/fast_td3/train.py @@ -172,7 +172,7 @@ def main() -> None: # Check if task class accepts state_file_path parameter (only track tasks do) init_signature = inspect.signature(task_cls.__init__) accepts_state_file_path = "state_file_path" in init_signature.parameters - + # Pass state_file_path from config if task accepts it (for track tasks) state_file_path = cfg("state_file_path", None) if accepts_state_file_path and state_file_path is not None: diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py index 2767a4eec..0b7947d41 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py @@ -6,9 +6,8 @@ from __future__ import annotations -import os import importlib.util -from copy import deepcopy +import os import torch from loguru import logger as log @@ -17,7 +16,7 @@ from metasim.scenario.objects import RigidObjCfg from metasim.scenario.scenario import ScenarioCfg, SimParamCfg from metasim.task.registry import register_task -from metasim.utils.math import matrix_from_quat, quat_apply +from metasim.utils.math import quat_apply from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple from roboverse_pack.tasks.pick_place.base import PickPlaceBase @@ -128,7 +127,7 @@ class PickPlaceApproachGraspSimpleBowl(PickPlaceApproachGraspSimple): def __init__(self, scenario, device=None): super().__init__(scenario, device) - + # Grasp point offset for bowl: point to the rim/edge instead of center # Bowl coordinate frame: (x, y) at center of bowl, z is height (vertical) # The bowl's origin is at the center/bottom, but we want to grasp at the rim @@ -139,57 +138,59 @@ def __init__(self, scenario, device=None): # Offset format: [x, y, z] in object's local frame # [0.099, 0.0, 0.1] = 9.9cm radius in x (matches edge at z=0.1m), 0 in y, 10cm upward in z self.grasp_point_offset_local = torch.tensor([0.099, 0.0, 0.1]) # [x, y, z] in object frame - + # Note: gripper orientation reward is now inherited from base class (approach_grasp.py) # No need to override reward functions here - + def _get_grasp_point(self, states): """Get the grasp point for the bowl (rim/edge) instead of center. - + Bowl coordinate frame convention: - (x, y) plane is at the center of the bowl - z-axis is the height of the bowl (vertical, pointing up) - Origin is at the center/bottom of the bowl - + The offset [0.099, 0.0, 0.1] points to the rim edge: - 0.099m in x direction (radius from center, matches actual edge radius at z=0.1m) - 0.0m in y direction (grasping at a specific point on rim) - 0.1m in z direction (upward, at 10cm height from bottom) - + Args: states: Environment states - + Returns: grasp_point: (B, 3) grasp point in world coordinates (rim of bowl) """ # Get object center position and rotation box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of bowl box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - bowl orientation - + # Move offset to correct device and expand to batch size - offset_local = self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) # (B, 3) - + offset_local = ( + self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) + ) # (B, 3) + # Transform grasp point offset from object local frame to world frame # quat_apply rotates a vector by a quaternion # This accounts for the bowl's rotation in the world offset_world = quat_apply(box_quat, offset_local) # (B, 3) - + # Add offset to object center to get grasp point (rim position) grasp_point = box_pos + offset_world # (B, 3) - + return grasp_point - + def step(self, actions): """Step with delta control and simple gripper control, using bowl rim as grasp point.""" current_states = self.handler.get_states(mode="tensor") - + # Use grasp point (bowl rim) instead of object center grasp_point = self._get_grasp_point(current_states) # (B, 3) gripper_pos, _ = self._get_ee_state(current_states) - + # Calculate 3D Euclidean distance between gripper and grasp point (rim) gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - + # Calculate z-axis distance (vertical distance) delta = gripper_pos - grasp_point dist_z = torch.abs(delta[:, 2]) @@ -242,7 +243,7 @@ def step(self, actions): info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) return obs, reward, terminated, time_out, info - + def _compute_grasp_state(self, states): """Compute if object is grasped, using bowl rim as grasp point.""" # Use grasp point (bowl rim) instead of object center @@ -265,7 +266,7 @@ def _compute_grasp_state(self, states): is_grasping = stable_grasp return is_grasping - + def _reward_gripper_approach(self, env_states) -> torch.Tensor: """Reward for gripper approaching the bowl rim.""" # Use grasp point (bowl rim) instead of object center @@ -279,14 +280,16 @@ def _reward_gripper_approach(self, env_states) -> torch.Tensor: def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - + Uses saved poses from object_layout.py. Loads bowl, table, basket, and trajectory markers from saved_poses_20251208_bowl_basket.py. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251208_bowl_basket.py" + "get_started", + "output", + "saved_poses_20251208_bowl_basket.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -298,7 +301,7 @@ def _get_initial_states(self) -> list[dict] | None: # Fallback to default poses if saved file not found log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") saved_poses = None - + if saved_poses is not None: # Use saved poses, mapping bowl to "object" init = [] @@ -309,7 +312,9 @@ def _get_initial_states(self) -> list[dict] | None: "object": saved_poses["objects"]["bowl"], "table": saved_poses["objects"]["table"], # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + "basket": saved_poses["objects"].get( + "basket", saved_poses["objects"]["table"] + ), # Fallback to table if basket not found # Include trajectory markers if present "traj_marker_0": saved_poses["objects"]["traj_marker_0"], "traj_marker_1": saved_poses["objects"]["traj_marker_1"], @@ -379,4 +384,3 @@ def _get_initial_states(self) -> list[dict] | None: ] return init - diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py index 20185c506..3d1333314 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py @@ -6,8 +6,8 @@ from __future__ import annotations -import os import importlib.util +import os import torch from loguru import logger as log @@ -127,7 +127,7 @@ class PickPlaceApproachGraspSimpleCuttingTool(PickPlaceApproachGraspSimple): def __init__(self, scenario, device=None): super().__init__(scenario, device) - + # Grasp point offset for cuttingtool: point to the handle center instead of object center # Cutting tools typically have handles at one end. The offset is in the object's local frame. # Adjust these values based on your cuttingtool mesh: @@ -139,41 +139,43 @@ def __init__(self, scenario, device=None): def _get_grasp_point(self, states): """Get the grasp point for the cuttingtool (handle center) instead of object center. - + Args: states: Environment states - + Returns: grasp_point: (B, 3) grasp point in world coordinates (handle center) """ # Get object center position and rotation box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of object box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - object orientation - + # Move offset to correct device and expand to batch size - offset_local = self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) # (B, 3) - + offset_local = ( + self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) + ) # (B, 3) + # Transform grasp point offset from object local frame to world frame # quat_apply rotates a vector by a quaternion # This accounts for the object's rotation in the world offset_world = quat_apply(box_quat, offset_local) # (B, 3) - + # Add offset to object center to get grasp point (handle position) grasp_point = box_pos + offset_world # (B, 3) - + return grasp_point def step(self, actions): """Step with delta control and simple gripper control, using handle center as grasp point.""" current_states = self.handler.get_states(mode="tensor") - + # Use grasp point (handle center) instead of object center grasp_point = self._get_grasp_point(current_states) # (B, 3) gripper_pos, _ = self._get_ee_state(current_states) - + # Calculate 3D Euclidean distance between gripper and grasp point (handle) gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - + # Apply delta control delta_actions = actions * self._action_scale new_actions = self._last_action + delta_actions @@ -246,14 +248,16 @@ def _compute_grasp_state(self, states): def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - + Uses saved poses from object_layout.py. Loads cutting_tools, table, basket, and trajectory markers from saved_poses_20251210_cuttingtool.py. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251210_cuttingtool.py" + "get_started", + "output", + "saved_poses_20251210_cuttingtool.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -265,7 +269,7 @@ def _get_initial_states(self) -> list[dict] | None: # Fallback to default poses if saved file not found log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") saved_poses = None - + if saved_poses is not None: # Use saved poses from object_layout.py init = [] @@ -276,7 +280,9 @@ def _get_initial_states(self) -> list[dict] | None: "object": saved_poses["objects"]["cutting_tools"], "table": saved_poses["objects"]["table"], # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + "basket": saved_poses["objects"].get( + "basket", saved_poses["objects"]["table"] + ), # Fallback to table if basket not found # Include trajectory markers if present "traj_marker_0": saved_poses["objects"]["traj_marker_0"], "traj_marker_1": saved_poses["objects"]["traj_marker_1"], @@ -346,4 +352,3 @@ def _get_initial_states(self) -> list[dict] | None: ] return init - diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py index 39bc93fc0..5b6cfdb14 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -6,8 +6,8 @@ from __future__ import annotations -import os import importlib.util +import os import torch from loguru import logger as log @@ -125,14 +125,16 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - + Uses saved poses from object_layout.py. Loads spoon, table, basket, and trajectory markers from saved_poses_20251206_spoon_basket.py. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251206_spoon_basket.py" + "get_started", + "output", + "saved_poses_20251206_spoon_basket.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -144,7 +146,7 @@ def _get_initial_states(self) -> list[dict] | None: # Fallback to default poses if saved file not found log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") saved_poses = None - + if saved_poses is not None: # Use saved poses from object_layout.py init = [] @@ -155,7 +157,9 @@ def _get_initial_states(self) -> list[dict] | None: "object": saved_poses["objects"]["spoon"], "table": saved_poses["objects"]["table"], # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get("basket", saved_poses["objects"]["table"]), # Fallback to table if basket not found + "basket": saved_poses["objects"].get( + "basket", saved_poses["objects"]["table"] + ), # Fallback to table if basket not found # Include trajectory markers if present "traj_marker_0": saved_poses["objects"]["traj_marker_0"], "traj_marker_1": saved_poses["objects"]["traj_marker_1"], diff --git a/roboverse_pack/tasks/pick_place/base.py b/roboverse_pack/tasks/pick_place/base.py index b80bed175..ba472f47f 100644 --- a/roboverse_pack/tasks/pick_place/base.py +++ b/roboverse_pack/tasks/pick_place/base.py @@ -339,14 +339,17 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Progress reward: reward for reducing distance to waypoint # This encourages the agent to make progress toward the current waypoint # Only reward progress if we haven't reached this waypoint yet - not_already_reached = ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + not_already_reached = ~self.waypoints_reached[ + torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx + ] distance_reduction = self.prev_distance_to_waypoint - distance # Scale progress reward: 0.1 means progress reward is 10% of waypoint reached bonus # This provides continuous guidance without overwhelming the sparse reward - progress_reward_component = torch.clamp( - distance_reduction * self.w_tracking_progress * 0.1, - min=0.0 - ) * not_already_reached.float() * grasped_mask.float() + progress_reward_component = ( + torch.clamp(distance_reduction * self.w_tracking_progress * 0.1, min=0.0) + * not_already_reached.float() + * grasped_mask.float() + ) # Distance-based reward (far + near) / 2 distance_reward_far = 1 - torch.tanh(1.0 * distance) @@ -373,8 +376,8 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: newly_reached = reached & not_already_reached # Waypoint reached bonus (one-time large reward when reaching a waypoint) waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress - - # Update prev_distance for next step (only if not newly reached, + + # Update prev_distance for next step (only if not newly reached, # because if newly reached, we'll update it when advancing to next waypoint) self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] @@ -422,9 +425,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Combine all reward components: approach reward + progress reward + waypoint reached bonus tracking_reward = torch.where( - all_reached, - maintain_reward, - approach_reward + progress_reward_component + waypoint_reached_bonus + all_reached, maintain_reward, approach_reward + progress_reward_component + waypoint_reached_bonus ) return tracking_reward diff --git a/roboverse_pack/tasks/pick_place/track_banana.py b/roboverse_pack/tasks/pick_place/track_banana.py index 4f74fc447..1213d64f8 100644 --- a/roboverse_pack/tasks/pick_place/track_banana.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -415,7 +415,7 @@ def step(self, actions): delta_actions = actions * self._action_scale new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - + # delta_actions = actions * self._action_scale # new_actions = self._last_action + delta_actions diff --git a/roboverse_pack/tasks/pick_place/track_screwdriver.py b/roboverse_pack/tasks/pick_place/track_screwdriver.py index de46689c9..37d59fbbc 100644 --- a/roboverse_pack/tasks/pick_place/track_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/track_screwdriver.py @@ -416,7 +416,7 @@ def step(self, actions): delta_actions = actions * self._action_scale new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - + # delta_actions = actions * self._action_scale # new_actions = self._last_action + delta_actions diff --git a/roboverse_pack/tasks/pick_place/track_spoon.py b/roboverse_pack/tasks/pick_place/track_spoon.py index b195dd25e..855264147 100644 --- a/roboverse_pack/tasks/pick_place/track_spoon.py +++ b/roboverse_pack/tasks/pick_place/track_spoon.py @@ -8,7 +8,6 @@ import os import pickle -import importlib.util from copy import deepcopy import numpy as np @@ -139,7 +138,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 # Increase reach threshold for spoon (more lenient for higher waypoints) DEFAULT_CONFIG_TRACK["trajectory_tracking"]["reach_threshold"] = 0.08 # Increased from 0.05 to 0.08m - + @register_task("pick_place.track_spoon", "pick_place_track_spoon") class PickPlaceTrackSpoon(PickPlaceBase): @@ -308,38 +307,41 @@ def _get_initial_states(self) -> list[dict] | None: # Load trajectory marker positions from saved poses file (spoon-specific enhancement) saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", "output", "saved_poses_20251206_spoon_basket.py" + "get_started", + "output", + "saved_poses_20251206_spoon_basket.py", ) - + saved_traj_markers = None saved_table = None saved_basket = None - + if os.path.exists(saved_poses_path): try: import importlib.util + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) saved_poses_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(saved_poses_module) saved_poses = saved_poses_module.poses - + # Extract trajectory markers saved_traj_markers = {} for i in range(self.num_waypoints): marker_name = f"traj_marker_{i}" if marker_name in saved_poses["objects"]: saved_traj_markers[marker_name] = saved_poses["objects"][marker_name] - + # Extract table and basket if present if "table" in saved_poses["objects"]: saved_table = saved_poses["objects"]["table"] if "basket" in saved_poses["objects"]: saved_basket = saved_poses["objects"]["basket"] - + log.info(f"Loaded trajectory markers from {saved_poses_path}") except Exception as e: log.warning(f"Failed to load saved poses from {saved_poses_path}: {e}") - + # Default waypoint positions (fallback if saved poses not available) default_positions = [ torch.tensor([0.610000, -0.280000, 0.150000], device=device), @@ -359,7 +361,7 @@ def _get_initial_states(self) -> list[dict] | None: for env_idx, initial_state in enumerate(initial_states): if "objects" not in initial_state: initial_state["objects"] = {} - + # Force gripper to be fully closed for proper grasping if "robots" in initial_state and robot_name in initial_state["robots"]: robot_state = initial_state["robots"][robot_name] @@ -370,17 +372,35 @@ def _get_initial_states(self) -> list[dict] | None: dof_pos["panda_finger_joint1"] = 0.0 if "panda_finger_joint2" in dof_pos: dof_pos["panda_finger_joint2"] = 0.0 - log.debug(f"[Env {env_idx}] Forced gripper closed: finger1={dof_pos.get('panda_finger_joint1', 'N/A')}, finger2={dof_pos.get('panda_finger_joint2', 'N/A')}") - + log.debug( + f"[Env {env_idx}] Forced gripper closed: finger1={dof_pos.get('panda_finger_joint1', 'N/A')}, finger2={dof_pos.get('panda_finger_joint2', 'N/A')}" + ) + # Update table position from saved poses if available if saved_table is not None and "table" in initial_state["objects"]: - initial_state["objects"]["table"]["pos"] = saved_table["pos"].to(device) if isinstance(saved_table["pos"], torch.Tensor) else torch.tensor(saved_table["pos"], device=device) - initial_state["objects"]["table"]["rot"] = saved_table["rot"].to(device) if isinstance(saved_table["rot"], torch.Tensor) else torch.tensor(saved_table["rot"], device=device) - + initial_state["objects"]["table"]["pos"] = ( + saved_table["pos"].to(device) + if isinstance(saved_table["pos"], torch.Tensor) + else torch.tensor(saved_table["pos"], device=device) + ) + initial_state["objects"]["table"]["rot"] = ( + saved_table["rot"].to(device) + if isinstance(saved_table["rot"], torch.Tensor) + else torch.tensor(saved_table["rot"], device=device) + ) + # Add basket from saved poses if available if saved_basket is not None: - basket_pos = saved_basket["pos"].to(device) if isinstance(saved_basket["pos"], torch.Tensor) else torch.tensor(saved_basket["pos"], device=device) - basket_rot = saved_basket["rot"].to(device) if isinstance(saved_basket["rot"], torch.Tensor) else torch.tensor(saved_basket["rot"], device=device) + basket_pos = ( + saved_basket["pos"].to(device) + if isinstance(saved_basket["pos"], torch.Tensor) + else torch.tensor(saved_basket["pos"], device=device) + ) + basket_rot = ( + saved_basket["rot"].to(device) + if isinstance(saved_basket["rot"], torch.Tensor) + else torch.tensor(saved_basket["rot"], device=device) + ) initial_state["objects"]["basket"] = { "pos": basket_pos, "rot": basket_rot, @@ -393,8 +413,16 @@ def _get_initial_states(self) -> list[dict] | None: if saved_traj_markers is not None and marker_name in saved_traj_markers: # Use saved marker position marker_data = saved_traj_markers[marker_name] - marker_pos = marker_data["pos"].to(device) if isinstance(marker_data["pos"], torch.Tensor) else torch.tensor(marker_data["pos"], device=device) - marker_rot = marker_data["rot"].to(device) if isinstance(marker_data["rot"], torch.Tensor) else torch.tensor(marker_data["rot"], device=device) + marker_pos = ( + marker_data["pos"].to(device) + if isinstance(marker_data["pos"], torch.Tensor) + else torch.tensor(marker_data["pos"], device=device) + ) + marker_rot = ( + marker_data["rot"].to(device) + if isinstance(marker_data["rot"], torch.Tensor) + else torch.tensor(marker_data["rot"], device=device) + ) initial_state["objects"][marker_name] = { "pos": marker_pos, "rot": marker_rot, @@ -460,7 +488,7 @@ def step(self, actions): current_joint_pos = self.handler.get_states().robots[self.robot_name].joint_pos delta_actions = actions * self._action_scale new_actions = current_joint_pos + delta_actions - + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) @@ -475,10 +503,10 @@ def step(self, actions): box_pos = updated_states.objects["object"].root_state[:, 0:3] gripper_pos, _ = self._get_ee_state(updated_states) gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) - + gripper_joint_pos = updated_states.robots[self.robot_name].joint_pos[:, :2] gripper_actually_closed = gripper_joint_pos.mean(dim=-1) < 0.02 - + is_grasping = (gripper_box_dist < self.grasp_check_distance) & gripper_actually_closed self.object_grasped = is_grasping newly_released = ~is_grasping @@ -501,7 +529,7 @@ def step(self, actions): def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: """Override reward calculation with increased progress reward for later waypoints. - + This addresses the issue where agent gets stuck after waypoint 2 by: 1. Increasing progress reward multiplier (from 0.1 to 0.3) for better guidance 2. Adding adaptive scaling for later waypoints to encourage progress @@ -515,22 +543,25 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: distance = torch.norm(ee_pos - target_pos, dim=-1) # Progress reward with higher multiplier for better guidance - not_already_reached = ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + not_already_reached = ~self.waypoints_reached[ + torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx + ] distance_reduction = self.prev_distance_to_waypoint - distance - + # Adaptive progress reward: higher for later waypoints (2, 3, 4) to encourage progress # Waypoints 0-1: 0.2 (20%), Waypoints 2+: 0.4 (40%) to overcome larger distances # Create multiplier tensor matching num_envs shape progress_multiplier = torch.where( self.current_waypoint_idx >= 2, torch.full((self.num_envs,), 0.4, device=self.device), - torch.full((self.num_envs,), 0.2, device=self.device) + torch.full((self.num_envs,), 0.2, device=self.device), + ) + + progress_reward_component = ( + torch.clamp(distance_reduction * self.w_tracking_progress * progress_multiplier, min=0.0) + * not_already_reached.float() + * grasped_mask.float() ) - - progress_reward_component = torch.clamp( - distance_reduction * self.w_tracking_progress * progress_multiplier, - min=0.0 - ) * not_already_reached.float() * grasped_mask.float() # Distance-based reward (far + near) / 2 distance_reward_far = 1 - torch.tanh(1.0 * distance) @@ -546,6 +577,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: rot_err = None if self.enable_rotation_tracking: from metasim.utils.math import matrix_from_quat + box_quat = env_states.objects["object"].root_state[:, 3:7] box_mat = matrix_from_quat(box_quat).reshape(self.num_envs, 9) target_quat = self.waypoint_rotations[self.current_waypoint_idx] @@ -558,7 +590,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: newly_reached = reached & not_already_reached # Waypoint reached bonus (one-time large reward when reaching a waypoint) waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress - + # Update prev_distance for next step self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] @@ -606,9 +638,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Combine all reward components tracking_reward = torch.where( - all_reached, - maintain_reward, - approach_reward + progress_reward_component + waypoint_reached_bonus + all_reached, maintain_reward, approach_reward + progress_reward_component + waypoint_reached_bonus ) return tracking_reward From cb5635a66e44f4c30f8a509a46a1925f9e56e817 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Sat, 13 Dec 2025 17:54:20 -0800 Subject: [PATCH 15/37] Adjust Spoon, two spoon class in one file, two spoon yaml. --- get_started/obj_layout/object_layout.py | 64 +-- .../rl/fast_td3/configs/pick_place_spoon.yaml | 2 +- .../fast_td3/configs/pick_place_spoon2.yaml | 90 ++++ .../tasks/pick_place/approach_grasp_bowl.py | 386 ------------------ .../pick_place/approach_grasp_cuttingtool.py | 354 ---------------- .../tasks/pick_place/approach_grasp_spoon.py | 288 ++++++++++--- 6 files changed, 361 insertions(+), 823 deletions(-) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml diff --git a/get_started/obj_layout/object_layout.py b/get_started/obj_layout/object_layout.py index 7487ffb53..b4857d944 100644 --- a/get_started/obj_layout/object_layout.py +++ b/get_started/obj_layout/object_layout.py @@ -426,14 +426,14 @@ def __post_init__(self): # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", # ), - # RigidObjCfg( - # name="spoon", - # scale=(1, 1, 1), - # physics=PhysicStateType.RIGIDBODY, - # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", - # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", - # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", - # ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), # RigidObjCfg( # name="banana", # scale=(1, 1, 1), @@ -442,22 +442,22 @@ def __post_init__(self): # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", # ), - # RigidObjCfg( - # name="mug", - # scale=(1, 1, 1), - # physics=PhysicStateType.RIGIDBODY, - # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", - # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", - # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", - # ), - # RigidObjCfg( - # name="book", - # scale=(1, 1, 1), - # physics=PhysicStateType.RIGIDBODY, - # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", - # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", - # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", - # ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), # RigidObjCfg( # name="lamp", # scale=(1, 1, 1), @@ -482,14 +482,14 @@ def __post_init__(self): # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/result/rubik's_cube.urdf", # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/rubik's_cube/mjcf/rubik's_cube.xml", # ), - # RigidObjCfg( - # name="vase", - # scale=(1, 1, 1), - # physics=PhysicStateType.RIGIDBODY, - # usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", - # urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", - # mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", - # ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), # Trajectory markers RigidObjCfg( name="traj_marker_0", diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml index fcde5d7e7..ab03278a1 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon.yaml @@ -82,7 +82,7 @@ exp_name: "get_started_fttd3_spoon" use_wandb: false checkpoint_path: null run_name: "pick_place.approach_grasp_simple_spoon" # Unique run name -model_dir: "models/spoon_without_Zaxis" # Separate directory for basket layout +model_dir: "models/spoon" # Separate directory for basket layout eval_interval: 5000 save_interval: 5000 video_width: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml new file mode 100644 index 000000000..7da96ae97 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml @@ -0,0 +1,90 @@ +# Base Configuration for FastTD3 Training - Spoon Scene 2 +# Configuration for spoon grasping task with second scene layout + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_spoon2" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_spoon2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_spoon2" # Unique run name for scene 2 +model_dir: "models/spoon2" # Separate directory for scene 2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py index 0b7947d41..e69de29bb 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_bowl.py @@ -1,386 +0,0 @@ -"""Stage 1: Simple Approach and Grasp task for bowl object. - -This task inherits from PickPlaceApproachGraspSimple and customizes it for the bowl object -with specific mesh configurations and saved poses from object_layout.py. -""" - -from __future__ import annotations - -import importlib.util -import os - -import torch -from loguru import logger as log - -from metasim.constants import PhysicStateType -from metasim.scenario.objects import RigidObjCfg -from metasim.scenario.scenario import ScenarioCfg, SimParamCfg -from metasim.task.registry import register_task -from metasim.utils.math import quat_apply -from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -from roboverse_pack.tasks.pick_place.base import PickPlaceBase - - -@register_task("pick_place.approach_grasp_simple_bowl", "pick_place_approach_grasp_simple_bowl") -class PickPlaceApproachGraspSimpleBowl(PickPlaceApproachGraspSimple): - """Simple Approach and Grasp task for bowl object. - - This task inherits from PickPlaceApproachGraspSimple and customizes: - - Scenario: Uses bowl mesh, table mesh, and basket from EmbodiedGenData - - Initial states: Loads poses from saved_poses_20251208_bowl_basket.py - """ - - scenario = ScenarioCfg( - objects=[ - # Use actual bowl mesh from EmbodiedGenData (matches object_layout.py) - RigidObjCfg( - name="object", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", - ), - # Use actual table mesh from EmbodiedGenData (matches object_layout.py) - RigidObjCfg( - name="table", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", - fix_base_link=True, - ), - # Basket for visualization (matches object_layout.py) - RigidObjCfg( - name="basket", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", - ), - # Visualization: Trajectory waypoints (5 spheres showing trajectory path) - RigidObjCfg( - name="traj_marker_0", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_1", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_2", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_3", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_4", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - ], - robots=["franka"], - sim_params=SimParamCfg( - dt=0.005, - ), - decimation=4, - ) - max_episode_steps = 200 - - def __init__(self, scenario, device=None): - super().__init__(scenario, device) - - # Grasp point offset for bowl: point to the rim/edge instead of center - # Bowl coordinate frame: (x, y) at center of bowl, z is height (vertical) - # The bowl's origin is at the center/bottom, but we want to grasp at the rim - # Based on mesh analysis: bowl radius varies with height - # - At z=0.1m: radius ~0.099m - # - At z=0.12m: radius ~0.107m - # - At z=0.125m: radius ~0.108m - # Offset format: [x, y, z] in object's local frame - # [0.099, 0.0, 0.1] = 9.9cm radius in x (matches edge at z=0.1m), 0 in y, 10cm upward in z - self.grasp_point_offset_local = torch.tensor([0.099, 0.0, 0.1]) # [x, y, z] in object frame - - # Note: gripper orientation reward is now inherited from base class (approach_grasp.py) - # No need to override reward functions here - - def _get_grasp_point(self, states): - """Get the grasp point for the bowl (rim/edge) instead of center. - - Bowl coordinate frame convention: - - (x, y) plane is at the center of the bowl - - z-axis is the height of the bowl (vertical, pointing up) - - Origin is at the center/bottom of the bowl - - The offset [0.099, 0.0, 0.1] points to the rim edge: - - 0.099m in x direction (radius from center, matches actual edge radius at z=0.1m) - - 0.0m in y direction (grasping at a specific point on rim) - - 0.1m in z direction (upward, at 10cm height from bottom) - - Args: - states: Environment states - - Returns: - grasp_point: (B, 3) grasp point in world coordinates (rim of bowl) - """ - # Get object center position and rotation - box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of bowl - box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - bowl orientation - - # Move offset to correct device and expand to batch size - offset_local = ( - self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) - ) # (B, 3) - - # Transform grasp point offset from object local frame to world frame - # quat_apply rotates a vector by a quaternion - # This accounts for the bowl's rotation in the world - offset_world = quat_apply(box_quat, offset_local) # (B, 3) - - # Add offset to object center to get grasp point (rim position) - grasp_point = box_pos + offset_world # (B, 3) - - return grasp_point - - def step(self, actions): - """Step with delta control and simple gripper control, using bowl rim as grasp point.""" - current_states = self.handler.get_states(mode="tensor") - - # Use grasp point (bowl rim) instead of object center - grasp_point = self._get_grasp_point(current_states) # (B, 3) - gripper_pos, _ = self._get_ee_state(current_states) - - # Calculate 3D Euclidean distance between gripper and grasp point (rim) - gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - - # Calculate z-axis distance (vertical distance) - delta = gripper_pos - grasp_point - dist_z = torch.abs(delta[:, 2]) - - # Apply delta control - delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions - real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - - # Simple gripper control: close when near grasp point (both 3D distance and z-axis condition) - real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist, dist_z) - - # Apply joint2 lift control if grasped - if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: - real_actions = self._apply_joint2_lift_control(real_actions, current_states) - - # Bypass PickPlaceBase.step to avoid its gripper control logic - obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) - self._last_action = real_actions - - # Update grasp state after step (for next step's comparison) - updated_states = self.handler.get_states(mode="tensor") - old_grasped = self.object_grasped.clone() - self.object_grasped = self._compute_grasp_state(updated_states) - - newly_grasped = self.object_grasped & (~old_grasped) - newly_released = (~self.object_grasped) & old_grasped - - if newly_grasped.any() and newly_grasped[0]: - log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_grasped] = True - - if newly_released.any() and newly_released[0]: - log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") - self._grasp_notified[newly_released] = False - - # Terminate episode if object is released - terminated = terminated | newly_released - - # Track lift state: check if joint2 has been lifted significantly - lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - if self.joint2_index is not None and self.initial_joint_pos is not None: - current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] - initial_joint2 = self.initial_joint_pos[:, self.joint2_index] - # Lift is active if joint2 has moved up significantly (more than 0.1 radians) - lift_active = (current_joint2 - initial_joint2) > 0.1 - - info["grasp_success"] = self.object_grasped - info["lift_active"] = lift_active - info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) - - return obs, reward, terminated, time_out, info - - def _compute_grasp_state(self, states): - """Compute if object is grasped, using bowl rim as grasp point.""" - # Use grasp point (bowl rim) instead of object center - grasp_point = self._get_grasp_point(states) # (B, 3) - gripper_pos, _ = self._get_ee_state(states) - gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - - # Update rolling distance history - if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) - self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) - self._distance_history[:, -1] = gripper_box_dist - - # Object is grasped if distance has been stable (close) for 5 frames - stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) - is_grasping = stable_grasp - - return is_grasping - - def _reward_gripper_approach(self, env_states) -> torch.Tensor: - """Reward for gripper approaching the bowl rim.""" - # Use grasp point (bowl rim) instead of object center - grasp_point = self._get_grasp_point(env_states) # (B, 3) - gripper_pos, _ = self._get_ee_state(env_states) - gripper_box_dist = torch.norm(grasp_point - gripper_pos, dim=-1) - - approach_reward_far = 1 - torch.tanh(gripper_box_dist) - approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) - return approach_reward_far + approach_reward_near - - def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads bowl, table, basket, and trajectory markers - from saved_poses_20251208_bowl_basket.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251208_bowl_basket.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses, mapping bowl to "object" - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Bowl as the object to pick - "object": saved_poses["objects"]["bowl"], - "table": saved_poses["objects"]["table"], - # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get( - "basket", saved_poses["objects"]["table"] - ), # Fallback to table if basket not found - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"]["traj_marker_0"], - "traj_marker_1": saved_poses["objects"]["traj_marker_1"], - "traj_marker_2": saved_poses["objects"]["traj_marker_2"], - "traj_marker_3": saved_poses["objects"]["traj_marker_3"], - "traj_marker_4": saved_poses["objects"]["traj_marker_4"], - }, - "robots": { - "franka": saved_poses["robots"]["franka"], - }, - } - init.append(env_state) - else: - # Default poses (fallback) - init = [ - { - "objects": { - "object": { - "pos": torch.tensor([0.654277, -0.345737, 0.020000]), - "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), - }, - "table": { - "pos": torch.tensor([0.499529, 0.253941, 0.200000]), - "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), - }, - # Trajectory waypoints (world coordinates) - "traj_marker_0": { - "pos": torch.tensor([0.610000, -0.280000, 0.150000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.600000, -0.190000, 0.220000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.560000, -0.110000, 0.360000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.530000, 0.010000, 0.470000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.510000, 0.130000, 0.460000]), - "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([-0.025, -0.160, 0.018054]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - "dof_pos": { - "panda_finger_joint1": 0.04, - "panda_finger_joint2": 0.04, - "panda_joint1": 0.0, - "panda_joint2": -0.785398, - "panda_joint3": 0.0, - "panda_joint4": -2.356194, - "panda_joint5": 0.0, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] - - return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py index 3d1333314..e69de29bb 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool.py @@ -1,354 +0,0 @@ -"""Stage 1: Simple Approach and Grasp task for cutting_tools object. - -This task inherits from PickPlaceApproachGraspSimple and customizes it for the cutting_tools object -with specific mesh configurations and saved poses from object_layout.py. -""" - -from __future__ import annotations - -import importlib.util -import os - -import torch -from loguru import logger as log - -from metasim.constants import PhysicStateType -from metasim.scenario.objects import RigidObjCfg -from metasim.scenario.scenario import ScenarioCfg, SimParamCfg -from metasim.task.registry import register_task -from metasim.utils.math import quat_apply -from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -from roboverse_pack.tasks.pick_place.base import PickPlaceBase - - -@register_task("pick_place.approach_grasp_simple_cuttingtool", "pick_place_approach_grasp_simple_cuttingtool") -class PickPlaceApproachGraspSimpleCuttingTool(PickPlaceApproachGraspSimple): - """Simple Approach and Grasp task for cutting_tools object. - - This task inherits from PickPlaceApproachGraspSimple and customizes: - - Scenario: Uses cutting_tools mesh, table mesh, and basket from EmbodiedGenData - - Initial states: Loads poses from saved_poses_20251210_cuttingtool.py - """ - - scenario = ScenarioCfg( - objects=[ - # Use actual cutting_tools mesh from EmbodiedGenData (matches object_layout.py) - RigidObjCfg( - name="object", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", - ), - # Use actual table mesh from EmbodiedGenData (matches object_layout.py) - RigidObjCfg( - name="table", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", - fix_base_link=True, - ), - # Basket for visualization (matches object_layout.py) - RigidObjCfg( - name="basket", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", - ), - # Visualization: Trajectory waypoints (5 spheres showing trajectory path) - RigidObjCfg( - name="traj_marker_0", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_1", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_2", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_3", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - RigidObjCfg( - name="traj_marker_4", - urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", - mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", - usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", - scale=0.2, - physics=PhysicStateType.XFORM, - enabled_gravity=False, - collision_enabled=False, - fix_base_link=True, - ), - ], - robots=["franka"], - sim_params=SimParamCfg( - dt=0.005, - ), - decimation=4, - ) - max_episode_steps = 200 - - def __init__(self, scenario, device=None): - super().__init__(scenario, device) - - # Grasp point offset for cuttingtool: point to the handle center instead of object center - # Cutting tools typically have handles at one end. The offset is in the object's local frame. - # Adjust these values based on your cuttingtool mesh: - # - Negative x typically points toward the handle end (assuming tool extends along x-axis) - # - You may need to adjust based on the actual mesh orientation - # Default: [-0.08, 0.0, 0.0] means 8cm along negative x-axis (toward handle) - # If your tool is oriented differently, adjust the offset accordingly - self.grasp_point_offset_local = torch.tensor([-0.08, 0.0, 0.0]) # [x, y, z] in object frame - - def _get_grasp_point(self, states): - """Get the grasp point for the cuttingtool (handle center) instead of object center. - - Args: - states: Environment states - - Returns: - grasp_point: (B, 3) grasp point in world coordinates (handle center) - """ - # Get object center position and rotation - box_pos = states.objects["object"].root_state[:, 0:3] # (B, 3) - center of object - box_quat = states.objects["object"].root_state[:, 3:7] # (B, 4) wxyz - object orientation - - # Move offset to correct device and expand to batch size - offset_local = ( - self.grasp_point_offset_local.to(box_pos.device).unsqueeze(0).expand(box_pos.shape[0], -1) - ) # (B, 3) - - # Transform grasp point offset from object local frame to world frame - # quat_apply rotates a vector by a quaternion - # This accounts for the object's rotation in the world - offset_world = quat_apply(box_quat, offset_local) # (B, 3) - - # Add offset to object center to get grasp point (handle position) - grasp_point = box_pos + offset_world # (B, 3) - - return grasp_point - - def step(self, actions): - """Step with delta control and simple gripper control, using handle center as grasp point.""" - current_states = self.handler.get_states(mode="tensor") - - # Use grasp point (handle center) instead of object center - grasp_point = self._get_grasp_point(current_states) # (B, 3) - gripper_pos, _ = self._get_ee_state(current_states) - - # Calculate 3D Euclidean distance between gripper and grasp point (handle) - gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - - # Apply delta control - delta_actions = actions * self._action_scale - new_actions = self._last_action + delta_actions - real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - - # Simple gripper control: close when near grasp point (ONLY 3D distance, NO z-axis condition) - real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist, dist_z=None) - - # Apply joint2 lift control if grasped - if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: - real_actions = self._apply_joint2_lift_control(real_actions, current_states) - - # Bypass PickPlaceBase.step to avoid its gripper control logic - obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) - self._last_action = real_actions - - # Update grasp state after step - updated_states = self.handler.get_states(mode="tensor") - old_grasped = self.object_grasped.clone() - self.object_grasped = self._compute_grasp_state(updated_states) - - newly_grasped = self.object_grasped & (~old_grasped) - newly_released = (~self.object_grasped) & old_grasped - - if newly_grasped.any() and newly_grasped[0]: - log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") - - if newly_released.any() and newly_released[0]: - log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") - - # Terminate episode if object is released - terminated = terminated | newly_released - - # Track lift state: check if joint2 has been lifted significantly - lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) - if self.joint2_index is not None and self.initial_joint_pos is not None: - current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] - initial_joint2 = self.initial_joint_pos[:, self.joint2_index] - # Lift is active if joint2 has moved up significantly (more than 0.1 radians) - lift_active = (current_joint2 - initial_joint2) > 0.1 - - info["grasp_success"] = self.object_grasped - info["lift_active"] = lift_active - info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) - - return obs, reward, terminated, time_out, info - - def _compute_grasp_state(self, states): - """Compute if object is grasped, using handle center as grasp point.""" - # Use grasp point (handle center) instead of object center - grasp_point = self._get_grasp_point(states) # (B, 3) - gripper_pos, _ = self._get_ee_state(states) - gripper_box_dist = torch.norm(gripper_pos - grasp_point, dim=-1) - - # Update rolling distance history - if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: - self._distance_history = torch.full( - (self.num_envs, self.GRASP_HISTORY_WINDOW), - float("inf"), - device=self.device, - ) - self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) - self._distance_history[:, -1] = gripper_box_dist - - # Object is grasped if distance has been stable (close) for 5 frames - stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) - is_grasping = stable_grasp - - return is_grasping - - def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads cutting_tools, table, basket, and trajectory markers - from saved_poses_20251210_cuttingtool.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251210_cuttingtool.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from object_layout.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Cutting_tools as the object to pick - "object": saved_poses["objects"]["cutting_tools"], - "table": saved_poses["objects"]["table"], - # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get( - "basket", saved_poses["objects"]["table"] - ), # Fallback to table if basket not found - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"]["traj_marker_0"], - "traj_marker_1": saved_poses["objects"]["traj_marker_1"], - "traj_marker_2": saved_poses["objects"]["traj_marker_2"], - "traj_marker_3": saved_poses["objects"]["traj_marker_3"], - "traj_marker_4": saved_poses["objects"]["traj_marker_4"], - }, - "robots": { - "franka": saved_poses["robots"]["franka"], - }, - } - init.append(env_state) - else: - # Default poses (fallback) - init = [ - { - "objects": { - "object": { - "pos": torch.tensor([0.654277, -0.345737, 0.020000]), - "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), - }, - "table": { - "pos": torch.tensor([0.499529, 0.253941, 0.200000]), - "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), - }, - # Trajectory waypoints (world coordinates) - "traj_marker_0": { - "pos": torch.tensor([0.610000, -0.280000, 0.150000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.600000, -0.190000, 0.220000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.560000, -0.110000, 0.360000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.530000, 0.010000, 0.470000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.510000, 0.130000, 0.460000]), - "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([-0.025, -0.160, 0.018054]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - "dof_pos": { - "panda_finger_joint1": 0.04, - "panda_finger_joint2": 0.04, - "panda_joint1": 0.0, - "panda_joint2": -0.785398, - "panda_joint3": 0.0, - "panda_joint4": -2.356194, - "panda_joint5": 0.0, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] - - return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py index 5b6cfdb14..5eb24832d 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -1,7 +1,7 @@ """Stage 1: Simple Approach and Grasp task for spoon object. This task inherits from PickPlaceApproachGraspSimple and customizes it for the spoon object -with specific mesh configurations and saved poses from object_layout.py. +with specific mesh configurations and saved poses from saved_poses_20251212_spoon.py. """ from __future__ import annotations @@ -25,21 +25,11 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): This task inherits from PickPlaceApproachGraspSimple and customizes: - Scenario: Uses spoon mesh, table mesh, and basket from EmbodiedGenData - - Initial states: Loads poses from saved_poses_20251206_spoon_basket.py + - Initial states: Loads poses from saved_poses_20251212_spoon.py """ scenario = ScenarioCfg( objects=[ - # Use actual spoon mesh from EmbodiedGenData (matches object_layout.py) - RigidObjCfg( - name="object", - scale=(1, 1, 1), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", - ), - # Use actual table mesh from EmbodiedGenData (matches object_layout.py) RigidObjCfg( name="table", scale=(1, 1, 1), @@ -47,9 +37,7 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", - fix_base_link=True, ), - # Basket for visualization (matches object_layout.py) RigidObjCfg( name="basket", scale=(1, 1, 1), @@ -58,6 +46,47 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual spoon mesh from EmbodiedGenData as the object to pick + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), # Visualization: Trajectory waypoints (5 spheres showing trajectory path) RigidObjCfg( name="traj_marker_0", @@ -126,15 +155,14 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): def _get_initial_states(self) -> list[dict] | None: """Get initial states for all environments. - Uses saved poses from object_layout.py. Loads spoon, table, basket, and trajectory markers - from saved_poses_20251206_spoon_basket.py. + Uses saved poses from saved_poses_20251212_spoon.py. Loads spoon, table, basket, and trajectory markers. """ # Add path to saved poses saved_poses_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), "get_started", "output", - "saved_poses_20251206_spoon_basket.py", + "saved_poses_20251212_spoon.py", ) if os.path.exists(saved_poses_path): # Load saved poses dynamically @@ -148,77 +176,236 @@ def _get_initial_states(self) -> list[dict] | None: saved_poses = None if saved_poses is not None: - # Use saved poses from object_layout.py + # Use saved poses from saved_poses_20251212_spoon.py init = [] for _ in range(self.num_envs): env_state = { "objects": { # Spoon as the object to pick - "object": saved_poses["objects"]["spoon"], - "table": saved_poses["objects"]["table"], - # Basket for visualization (if present in saved poses) - "basket": saved_poses["objects"].get( - "basket", saved_poses["objects"]["table"] - ), # Fallback to table if basket not found + "object": saved_poses["objects"].get("spoon"), + "table": saved_poses["objects"].get("table"), + "basket": saved_poses["objects"].get("basket"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "mug": saved_poses["objects"].get("mug"), + "book": saved_poses["objects"].get("book"), + "vase": saved_poses["objects"].get("vase"), # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"]["traj_marker_0"], - "traj_marker_1": saved_poses["objects"]["traj_marker_1"], - "traj_marker_2": saved_poses["objects"]["traj_marker_2"], - "traj_marker_3": saved_poses["objects"]["traj_marker_3"], - "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), }, "robots": { "franka": saved_poses["robots"]["franka"], }, } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} init.append(env_state) else: - # Default poses (fallback) + # Default poses (fallback) - using poses from saved_poses_20251212_spoon.py as hardcoded values init = [ { "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.210000, 0.250000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.560000, -0.730000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, "object": { - "pos": torch.tensor([0.654277, -0.345737, 0.020000]), - "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + "pos": torch.tensor([0.200000, -0.590000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), }, - "table": { - "pos": torch.tensor([0.499529, 0.253941, 0.200000]), - "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + "mug": { + "pos": torch.tensor([0.690000, -0.550000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.700000, 0.280000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.050000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, - # Trajectory waypoints (world coordinates) "traj_marker_0": { - "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "pos": torch.tensor([0.220000, -0.550000, 0.880000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_1": { - "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "pos": torch.tensor([0.220000, -0.310000, 0.900000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_2": { - "pos": torch.tensor([0.560000, -0.110000, 0.360000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + "pos": torch.tensor([0.210000, -0.250000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_3": { - "pos": torch.tensor([0.530000, 0.010000, 0.470000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.210000, 0.040000, 1.250000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), }, "traj_marker_4": { - "pos": torch.tensor([0.510000, 0.130000, 0.460000]), - "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + "pos": torch.tensor([0.190000, 0.250000, 1.010000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, }, "robots": { "franka": { - "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "pos": torch.tensor([0.560000, -0.230001, 0.800000]), + "rot": torch.tensor([0.120502, -0.000001, -0.000001, 0.992712]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init + + +@register_task("pick_place.approach_grasp_simple_spoon2", "pick_place_approach_grasp_simple_spoon2") +class PickPlaceApproachGraspSimpleSpoon2(PickPlaceApproachGraspSimpleSpoon): + """Simple Approach and Grasp task for spoon object - Scene 2. + + This task inherits from PickPlaceApproachGraspSimpleSpoon and uses poses from + saved_poses_20251212_spoon2.py for a different scene layout. + """ + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from saved_poses_20251212_spoon2.py. Loads spoon, table, basket, and trajectory markers. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251212_spoon2.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from saved_poses_20251212_spoon2.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Spoon as the object to pick + "object": saved_poses["objects"].get("spoon"), + "table": saved_poses["objects"].get("table"), + "basket": saved_poses["objects"].get("basket"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "mug": saved_poses["objects"].get("mug"), + "book": saved_poses["objects"].get("book"), + "vase": saved_poses["objects"].get("vase"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from saved_poses_20251212_spoon2.py as hardcoded values + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.640000, -0.620000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.290000, 0.370000, 0.820000]), + "rot": torch.tensor([0.640996, -0.000000, -0.000000, 0.767544]), + }, + "object": { + "pos": torch.tensor([0.140000, -0.370000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), + }, + "mug": { + "pos": torch.tensor([0.520000, 0.250000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.240000, 0.160000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.270000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.150000, -0.350000, 0.860000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.240000, -0.460000, 0.940000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.270000, -0.610000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.380000, -0.640000, 1.310000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.670000, -0.620000, 1.020000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.550000, -0.120000, 0.800000]), + "rot": torch.tensor([-0.393287, -0.000001, -0.000000, 0.919414]), "dof_pos": { - "panda_finger_joint1": 0.04, - "panda_finger_joint2": 0.04, - "panda_joint1": 0.0, + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, "panda_joint2": -0.785398, - "panda_joint3": 0.0, + "panda_joint3": 0.000000, "panda_joint4": -2.356194, - "panda_joint5": 0.0, + "panda_joint5": 0.000000, "panda_joint6": 1.570796, "panda_joint7": 0.785398, }, @@ -229,3 +416,4 @@ def _get_initial_states(self) -> list[dict] | None: ] return init + From 671aff4c261b781fea9d3dfa801a1e5d3860a192 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Sat, 13 Dec 2025 18:14:26 -0800 Subject: [PATCH 16/37] Apply pre-commit fixes and add new pick_place_spoon2 config --- .gitignore | 1 + .../fast_td3/configs/pick_place_spoon2.yaml | 90 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/.gitignore b/.gitignore index 1a246f924..a9da1f783 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,4 @@ id_rsa* *.pkl *.pt output +.devcontainer/devcontainer.json diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml index 7da96ae97..7b32c7179 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml @@ -88,3 +88,93 @@ save_interval: 5000 video_width: 1024 video_height: 1024 +# Base Configuration for FastTD3 Training - Spoon Scene 2 +# Configuration for spoon grasping task with second scene layout + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_spoon2" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_spoon2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_spoon2" # Unique run name for scene 2 +model_dir: "models/spoon2" # Separate directory for scene 2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 + From 6f3913ea5c769a0f2dfb63781b77779a87e23075 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Sat, 13 Dec 2025 18:15:52 -0800 Subject: [PATCH 17/37] Local devcontainer config (ignored from push) --- .devcontainer/devcontainer.json | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4820ba2f1..cb3c3b638 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,21 +1,8 @@ { - "name": "RoboVerse Development Container", - "privileged": true, - "dockerComposeFile": "../docker-compose.yml", - "service": "metasim", - "workspaceFolder": "/home/${localEnv:USER}/RoboVerse", - "customizations": { - "vscode": { - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance", // for vscode user - "anysphere.cursorpyright", // for cursor user - "charliermarsh.ruff" - ], - "settings": { - "terminal.integrated.defaultProfile.linux": "bash", - "terminal.integrated.profiles.linux": { "bash": { "path": "/bin/bash" } } - } - } - } + "name": "RoboVerse DevContainer", + "image": "ubuntu:20.04", + "mounts": [ + "source=${localEnv:HOME}/.ssh,target=/root/.ssh,type=bind" + ], + "postCreateCommand": "apt update && apt install -y git openssh-client && git config --global user.name 'Boqi Zhao' && git config --global user.email 'your_email@example.com'" } From e89acb48a9c8ee9296acaa8a013dc55376085ed4 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Sat, 13 Dec 2025 19:10:15 -0800 Subject: [PATCH 18/37] Fix duplicated sim key in pick_place_spoon2.yaml --- .../fast_td3/configs/pick_place_spoon2.yaml | 91 ------------------- 1 file changed, 91 deletions(-) diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml index 7b32c7179..59c38dd19 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_spoon2.yaml @@ -87,94 +87,3 @@ eval_interval: 5000 save_interval: 5000 video_width: 1024 video_height: 1024 - -# Base Configuration for FastTD3 Training - Spoon Scene 2 -# Configuration for spoon grasping task with second scene layout - -# ------------------------------------------------------------------------------- -# Environment -# ------------------------------------------------------------------------------- -sim: "isaacgym" -robots: ["franka"] -task: "pick_place.approach_grasp_simple_spoon2" -decimation: 4 -train_or_eval: "train" -headless: False - -# ------------------------------------------------------------------------------- -# Seeds & Device -# ------------------------------------------------------------------------------- -seed: 1 -cuda: true -torch_deterministic: true -device_rank: 0 - -# ------------------------------------------------------------------------------- -# Rollout & Timesteps -# ------------------------------------------------------------------------------- -num_envs: 400 -num_eval_envs: 400 -total_timesteps: 2000000 -learning_starts: 10 -num_steps: 1 - -# ------------------------------------------------------------------------------- -# Replay, Batching, Discounting -# ------------------------------------------------------------------------------- -buffer_size: 20480 -batch_size: 32768 -gamma: 0.99 -tau: 0.1 - -# ------------------------------------------------------------------------------- -# Update Schedule -# ------------------------------------------------------------------------------- -policy_frequency: 2 -num_updates: 5 -# ------------------------------------------------------------------------------- -# Optimizer & Network -# ------------------------------------------------------------------------------- -critic_learning_rate: 0.0003 -actor_learning_rate: 0.0003 -weight_decay: 0.1 -critic_hidden_dim: 512 -actor_hidden_dim: 256 -init_scale: 0.01 -num_atoms: 101 - -# ------------------------------------------------------------------------------- -# Value Distribution & Exploration -# ------------------------------------------------------------------------------- -v_min: 0 -v_max: 600.0 -policy_noise: 0.001 -std_min: 0.001 -std_max: 0.4 -noise_clip: 0.5 - -# ------------------------------------------------------------------------------- -# Algorithm Flags -# ------------------------------------------------------------------------------- -use_cdq: true -compile: true -obs_normalization: true -max_grad_norm: 0.0 -amp: true -amp_dtype: "fp16" -disable_bootstrap: false -measure_burnin: 3 - -# ------------------------------------------------------------------------------- -# Logging & Checkpointing -# ------------------------------------------------------------------------------- -wandb_project: "get_started_fttd3" -exp_name: "get_started_fttd3_spoon2" -use_wandb: false -checkpoint_path: null -run_name: "pick_place.approach_grasp_simple_spoon2" # Unique run name for scene 2 -model_dir: "models/spoon2" # Separate directory for scene 2 checkpoints -eval_interval: 5000 -save_interval: 5000 -video_width: 1024 -video_height: 1024 - From 5ced66fd02637f85fff3d54305191e8ec571250a Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Sat, 13 Dec 2025 19:22:27 -0800 Subject: [PATCH 19/37] Trigger CI From dd932c5e9942e479fc2c2d57c903963398f8696b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Dec 2025 03:22:48 +0000 Subject: [PATCH 20/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- roboverse_pack/tasks/pick_place/approach_grasp_spoon.py | 1 - scripts/advanced/collect_demo.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py index 5eb24832d..ee9f42ced 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -416,4 +416,3 @@ def _get_initial_states(self) -> list[dict] | None: ] return init - diff --git a/scripts/advanced/collect_demo.py b/scripts/advanced/collect_demo.py index cccf56f1a..8bf2e7dad 100644 --- a/scripts/advanced/collect_demo.py +++ b/scripts/advanced/collect_demo.py @@ -688,4 +688,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From e96b4a22ebe69a3c3dca5b989820d8ac1af189d5 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Sun, 14 Dec 2025 15:11:42 -0800 Subject: [PATCH 21/37] update the position of banana and screw driver --- .../tasks/pick_place/approach_grasp_banana.py | 48 +++++++++++-------- .../pick_place/approach_grasp_screwdriver.py | 30 ++++++------ 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index a8e6ed6c4..782653ac6 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -92,13 +92,21 @@ class PickPlaceApproachGraspBanana(PickPlaceBase): mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", ), RigidObjCfg( - name="screw_driver", - scale=(1.5, 1.5, 1.5), + name="cutting_tools", + scale=(1, 1, 1), physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", ), + # RigidObjCfg( + # name="screw_driver", + # scale=(1.5, 1.5, 1.5), + # physics=PhysicStateType.RIGIDBODY, + # usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + # urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + # mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + # ), RigidObjCfg( name="spoon", scale=(1, 1, 1), @@ -193,7 +201,7 @@ def __init__(self, scenario, device=None): self.reward_weights = [ self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], - 0.1, # weight for keeping the gripper pointing downward + 0.0, # weight for keeping the gripper pointing downward ] # Get config values @@ -412,54 +420,54 @@ def _get_initial_states(self) -> list[dict] | None: "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "lamp": { - "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "pos": torch.tensor([0.610000, 0.200000, 1.050000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "basket": { - "pos": torch.tensor([0.220000, 0.200000, 0.955000]), + "pos": torch.tensor([0.610000, -0.300000, 0.825000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "bowl": { - "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "pos": torch.tensor([0.350000, 0.250000, 0.863000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, - "screw_driver": { - "pos": torch.tensor([0.530000, -0.410000, 0.811000]), - "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + "cutting_tools": { + "pos": torch.tensor([0.180000, -0.070000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), }, "spoon": { "pos": torch.tensor([0.530000, -0.690000, 0.850000]), "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), }, "object": { - "pos": torch.tensor([0.280000, -0.280000, 0.825000]), - "rot": torch.tensor([0.889292, -0.000000, 0.000001, -0.457338]), + "pos": torch.tensor([0.280000, -0.580000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_0": { - "pos": torch.tensor([0.260000, -0.240000, 0.850000]), + "pos": torch.tensor([0.280000, -0.540000, 0.850000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_1": { - "pos": torch.tensor([0.270000, -0.190000, 0.970000]), + "pos": torch.tensor([0.320000, -0.490000, 0.910000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_2": { - "pos": torch.tensor([0.260000, -0.140000, 1.160000]), + "pos": torch.tensor([0.330000, -0.430000, 1.110000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_3": { - "pos": torch.tensor([0.250000, -0.030000, 1.240000]), + "pos": torch.tensor([0.360000, -0.350000, 1.210000]), "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), }, "traj_marker_4": { - "pos": torch.tensor([0.310000, 0.190000, 1.160000]), + "pos": torch.tensor([0.600000, -0.310000, 1.190000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, }, "robots": { "franka": { "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.475731, -0.000000, -0.000001, 0.879590]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813415]), "dof_pos": { "panda_finger_joint1": 0.040000, "panda_finger_joint2": 0.040000, diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index d72a14d50..2ffc4ef22 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -193,7 +193,7 @@ def __init__(self, scenario, device=None): self.reward_weights = [ self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], - 0.1, # weight for keeping the gripper pointing downward + 0.0, # weight for keeping the gripper pointing downward ] # Get config values @@ -416,7 +416,7 @@ def _get_initial_states(self) -> list[dict] | None: "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "basket": { - "pos": torch.tensor([0.280000, 0.130000, 0.825000]), + "pos": torch.tensor([0.240000, -0.440000, 0.925000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "bowl": { @@ -424,42 +424,42 @@ def _get_initial_states(self) -> list[dict] | None: "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "cutting_tools": { - "pos": torch.tensor([0.640000, -0.320000, 0.820000]), + "pos": torch.tensor([0.230000, 0.090000, 0.820000]), "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), }, "object": { - "pos": torch.tensor([0.320000, -0.340000, 0.811000]), - "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + "pos": torch.tensor([0.590000, -0.360000, 0.811000]), + "rot": torch.tensor([-0.824810, -0.390085, -0.023230, 0.408623]), }, "spoon": { - "pos": torch.tensor([0.530000, -0.690000, 0.850000]), - "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + "pos": torch.tensor([0.470000, -0.710000, 0.830000]), + "rot": torch.tensor([0.928125, -0.061844, -0.058501, -0.362397]), }, "traj_marker_0": { - "pos": torch.tensor([0.350000, -0.310000, 0.870000]), - "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + "pos": torch.tensor([0.600000, -0.330000, 0.840000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_1": { - "pos": torch.tensor([0.330000, -0.290000, 0.980000]), + "pos": torch.tensor([0.560000, -0.400000, 1.010000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, "traj_marker_2": { - "pos": torch.tensor([0.340000, -0.200000, 1.090000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "pos": torch.tensor([0.530000, -0.400000, 1.140000]), + "rot": torch.tensor([0.962425, -0.271547, 0.000000, 0.000000]), }, "traj_marker_3": { - "pos": torch.tensor([0.330000, -0.040000, 1.180000]), + "pos": torch.tensor([0.430000, -0.340000, 1.160000]), "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), }, "traj_marker_4": { - "pos": torch.tensor([0.310000, 0.150000, 1.160000]), + "pos": torch.tensor([0.260000, -0.400000, 1.170000]), "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), }, }, "robots": { "franka": { "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813414]), "dof_pos": { "panda_finger_joint1": 0.040000, "panda_finger_joint2": 0.040000, From 92912bec5194e64e98314d4c19be617bc8c830e5 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Sun, 14 Dec 2025 16:32:41 -0800 Subject: [PATCH 22/37] add approach_grasp.py --- .../fast_td3/configs/pick_place_banana.yaml | 2 +- .../tasks/pick_place/approach_grasp.py | 421 ++++++++++++++++++ 2 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml index 443d9dcd7..c4c2b9d7e 100644 --- a/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana.yaml @@ -6,7 +6,7 @@ # ------------------------------------------------------------------------------- sim: "isaacgym" robots: ["franka"] -task: "pick_place.approach_grasp_banana" +task: "pick_place.approach_grasp_simple_banana" decimation: 4 train_or_eval: "train" headless: False diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py new file mode 100644 index 000000000..4a3b0ff02 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -0,0 +1,421 @@ +"""Stage 1: Simple Approach and Grasp task with gripper control. + +This task focuses on learning to approach the object, grasp it with gripper, and lift it. +Simple gripper control: close when near the object. +""" + +from __future__ import annotations + +from copy import deepcopy + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import PrimitiveCubeCfg, RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from metasim.utils.math import matrix_from_quat +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +@register_task("pick_place.approach_grasp_simple", "pick_place_approach_grasp_simple") +class PickPlaceApproachGraspSimple(PickPlaceBase): + """Simple Approach and Grasp task with gripper control. + + This task focuses on: + - Approaching the object + - Grasping the object with simple gripper control (close when near) + + Success condition: Object is grasped (reward given when entering grasp state). + Episode terminates if object is released. + """ + + GRASP_DISTANCE_THRESHOLD = 0.02 # Distance threshold for both grasp check and gripper closing + GRASP_HISTORY_WINDOW = 5 # Number of frames to check for stable grasp + + # Joint2 lift parameters (for franka: panda_joint2) + JOINT2_LIFT_OFFSET = 0.5 # Amount to lift joint2 when grasped (positive = lift up) + JOINT2_LIFT_KP = 0.2 # Proportional gain for joint2 lift control + JOINT2_LIFT_MAX_DELTA = 0.3 # Maximum change per step + + DEFAULT_CONFIG_SIMPLE = deepcopy(DEFAULT_CONFIG) + DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"].update({ + "gripper_approach": 0.5, + "grasp_reward": 4.0, + "gripper_downward_alignment": 0.1, + }) + DEFAULT_CONFIG_SIMPLE["grasp_config"] = { + "grasp_check_distance": GRASP_DISTANCE_THRESHOLD, + "gripper_close_distance": GRASP_DISTANCE_THRESHOLD, + } + + scenario = ScenarioCfg( + objects=[ + PrimitiveCubeCfg( + name="object", + size=(0.04, 0.04, 0.04), + mass=0.02, + physics=PhysicStateType.RIGIDBODY, + color=(1.0, 0.0, 0.0), + ), + PrimitiveCubeCfg( + name="table", + size=(0.2, 0.3, 0.4), + mass=10.0, + physics=PhysicStateType.RIGIDBODY, + color=(0.8, 0.6, 0.4), + fix_base_link=True, + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Placeholders needed during super().__init__ (reset may be called there) + self.object_grasped = None + self.gripper_joint_indices = [0, 1] # panda_finger_joint1, panda_finger_joint2 + self._grasp_notified = None + self._distance_history = None # History buffer for stable grasp check + self.joint2_name = "panda_joint2" + self.joint2_index = None + self.initial_joint_pos = None + + super().__init__(scenario, device) + + # Override reward functions for this task + self.reward_functions = [ + self._reward_gripper_approach, + self._reward_grasp, + self._reward_gripper_downward_alignment, + ] + self.reward_weights = [ + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_approach"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["grasp_reward"], + self.DEFAULT_CONFIG_SIMPLE["reward_config"]["scales"]["gripper_downward_alignment"], + ] + + # Get config values + grasp_config = self.DEFAULT_CONFIG_SIMPLE["grasp_config"] + self.grasp_check_distance = grasp_config["grasp_check_distance"] + self.gripper_close_distance = grasp_config["gripper_close_distance"] + + # Initialize tracking buffers + self.object_grasped = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + + # Find joint2 index + joint_names = self.handler.get_joint_names(self.robot_name, sort=True) + if self.joint2_name in joint_names: + self.joint2_index = joint_names.index(self.joint2_name) + else: + log.warning(f"Joint {self.joint2_name} not found, joint2 lift disabled") + + def reset(self, env_ids=None): + """Reset environment and tracking variables.""" + obs, info = super().reset(env_ids=env_ids) + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + + # Reset grasp tracking + self.object_grasped[env_ids_tensor] = False + if self._grasp_notified is None or self._grasp_notified.shape[0] != self.num_envs: + self._grasp_notified = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + else: + self._grasp_notified[env_ids_tensor] = False + self._distance_history[env_ids_tensor] = float("inf") + + # Store initial joint positions if not already stored + if self.initial_joint_pos is None: + states = self.handler.get_states(mode="tensor") + self.initial_joint_pos = states.robots[self.robot_name].joint_pos.clone() + + return obs, info + + def step(self, actions): + """Step with delta control and simple gripper control.""" + current_states = self.handler.get_states(mode="tensor") + box_pos = current_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(current_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Apply delta control + delta_actions = actions * self._action_scale + new_actions = self._last_action + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # Simple gripper control: close when near object + real_actions = self._apply_simple_gripper_control(real_actions, gripper_box_dist) + + # Apply joint2 lift control if grasped + if self.object_grasped is not None and self.object_grasped.any() and self.joint2_index is not None: + real_actions = self._apply_joint2_lift_control(real_actions, current_states) + + # Bypass PickPlaceBase.step to avoid its gripper control logic + # Call RLTaskEnv.step directly + # Note: reward functions will be called inside super().step() + # and they will compute newly_grasped by comparing current state with self.object_grasped + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + # Update grasp state after step (for next step's comparison) + updated_states = self.handler.get_states(mode="tensor") + old_grasped = self.object_grasped.clone() + self.object_grasped = self._compute_grasp_state(updated_states) + + newly_grasped = self.object_grasped & (~old_grasped) + newly_released = (~self.object_grasped) & old_grasped + + if newly_grasped.any() and newly_grasped[0]: + log.info(f"[Env 0] Object grasped! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_grasped] = True + + if newly_released.any() and newly_released[0]: + log.info(f"[Env 0] Object released! Distance: {gripper_box_dist[0].item():.4f}m") + self._grasp_notified[newly_released] = False + + # Terminate episode if object is released + terminated = terminated | newly_released + + # Track lift state: check if joint2 has been lifted significantly + lift_active = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device) + if self.joint2_index is not None and self.initial_joint_pos is not None: + current_joint2 = updated_states.robots[self.robot_name].joint_pos[:, self.joint2_index] + initial_joint2 = self.initial_joint_pos[:, self.joint2_index] + # Lift is active if joint2 has moved up significantly (more than 0.1 radians) + lift_active = (current_joint2 - initial_joint2) > 0.1 + + info["grasp_success"] = self.object_grasped + info["lift_active"] = lift_active + info["stage"] = torch.full((self.num_envs,), 1, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info + + def _apply_simple_gripper_control(self, actions, gripper_box_dist): + """Simple gripper control: close when near object.""" + # Close gripper when close to object + gripper_close = gripper_box_dist < self.gripper_close_distance + gripper_value_close = torch.tensor(0.0, device=self.device, dtype=actions.dtype) # Closed + gripper_value_open = torch.tensor(0.04, device=self.device, dtype=actions.dtype) # Open + + # Set gripper joints + for gripper_idx in self.gripper_joint_indices: + actions[:, gripper_idx] = torch.where( + gripper_close, + gripper_value_close, + gripper_value_open, + ) + + return actions + + def _apply_joint2_lift_control(self, actions, current_states): + """Apply joint2 lift control when object is grasped.""" + if self.initial_joint_pos is None: + self.initial_joint_pos = current_states.robots[self.robot_name].joint_pos.clone() + + joint_pos = current_states.robots[self.robot_name].joint_pos + joint2_idx = self.joint2_index + + # Target position: initial position + lift offset (positive offset lifts up) + target_lift = self.initial_joint_pos[:, joint2_idx] + self.JOINT2_LIFT_OFFSET + joint_error = target_lift - joint_pos[:, joint2_idx] + + # Apply proportional control with max delta limit + desired = joint_pos[:, joint2_idx] + self.JOINT2_LIFT_KP * joint_error + delta = torch.clamp( + desired - joint_pos[:, joint2_idx], + -self.JOINT2_LIFT_MAX_DELTA, + self.JOINT2_LIFT_MAX_DELTA, + ) + joint2_value = torch.clamp( + joint_pos[:, joint2_idx] + delta, + self._action_low[joint2_idx], + self._action_high[joint2_idx], + ) + + # Apply lift control only to environments where object is grasped + actions[self.object_grasped, joint2_idx] = joint2_value[self.object_grasped] + + return actions + + def _compute_grasp_state(self, states): + """Compute if object is grasped (requires 5 stable frames based on distance only).""" + box_pos = states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + + # Update rolling distance history + if self._distance_history is None or self._distance_history.shape[0] != self.num_envs: + self._distance_history = torch.full( + (self.num_envs, self.GRASP_HISTORY_WINDOW), + float("inf"), + device=self.device, + ) + self._distance_history = torch.roll(self._distance_history, shifts=-1, dims=1) + self._distance_history[:, -1] = gripper_box_dist + + # Object is grasped if distance has been stable (close) for 5 frames + stable_grasp = (self._distance_history < self.grasp_check_distance).all(dim=1) + is_grasping = stable_grasp + + return is_grasping + + def _reward_gripper_approach(self, env_states) -> torch.Tensor: + """Reward for gripper approaching the box.""" + box_pos = env_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(env_states) + gripper_box_dist = torch.norm(box_pos - gripper_pos, dim=-1) + + approach_reward_far = 1 - torch.tanh(gripper_box_dist) + approach_reward_near = 1 - torch.tanh(gripper_box_dist * 10) + return approach_reward_far + approach_reward_near + + def _reward_grasp(self, env_states) -> torch.Tensor: + """Reward for maintaining grasp state (continuous reward while grasped).""" + # Use cached grasp state (computed in step method) + return self.object_grasped.float() + + def _reward_gripper_downward_alignment(self, env_states) -> torch.Tensor: + """Encourage the gripper tool frame to face downward (z-axis aligned with world -Z).""" + _, gripper_quat = self._get_ee_state(env_states) + gripper_rot = matrix_from_quat(gripper_quat) # (B, 3, 3) + gripper_z_world = gripper_rot[:, :, 2] # gripper local z-axis in world frame + + down = torch.tensor([0.0, 0.0, -1.0], device=self.device, dtype=gripper_z_world.dtype) + alignment = (gripper_z_world * down).sum(dim=-1).clamp(-1.0, 1.0) # cosine of angle to -Z + # Map cosine (1 best, -1 worst) to [0,1] reward + return (alignment + 1.0) / 2.0 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments.""" + init = [ + { + "objects": { + "object": { + "pos": torch.tensor([0.654277, -0.345737, 0.020000]), + "rot": torch.tensor([0.706448, -0.031607, 0.706347, 0.031698]), + }, + "table": { + "pos": torch.tensor([0.499529, 0.253941, 0.200000]), + "rot": torch.tensor([0.999067, -0.000006, 0.000009, 0.043198]), + }, + # Trajectory waypoints (world coordinates) + "traj_marker_0": { + "pos": torch.tensor([0.610000, -0.280000, 0.150000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.600000, -0.190000, 0.220000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.560000, -0.110000, 0.360000]), + "rot": torch.tensor([0.998750, 0.000000, 0.049979, -0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.530000, 0.010000, 0.470000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.510000, 0.130000, 0.460000]), + "rot": torch.tensor([0.984726, 0.000000, 0.174108, -0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([-0.025, -0.160, 0.018054]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.04, + "panda_finger_joint2": 0.04, + "panda_joint1": 0.0, + "panda_joint2": -0.785398, + "panda_joint3": 0.0, + "panda_joint4": -2.356194, + "panda_joint5": 0.0, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init \ No newline at end of file From f2853f48c783b25796574ef2b393c7fb06a01223 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 00:38:30 +0000 Subject: [PATCH 23/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- roboverse_pack/tasks/pick_place/approach_grasp.py | 2 +- roboverse_pack/tasks/pick_place/track_banana.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py index 4a3b0ff02..9ab913675 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -418,4 +418,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init \ No newline at end of file + return init diff --git a/roboverse_pack/tasks/pick_place/track_banana.py b/roboverse_pack/tasks/pick_place/track_banana.py index 324b6b066..ea36a8e83 100644 --- a/roboverse_pack/tasks/pick_place/track_banana.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -415,7 +415,7 @@ def step(self, actions): delta_actions = actions * self._action_scale new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - + # delta_actions = actions * self._action_scale # new_actions = self._last_action + delta_actions From 4fbe0b0ad724052f2cef01c84a60e26bcbaf2a96 Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:42:38 -0800 Subject: [PATCH 24/37] Roll back readme --- roboverse_pack/tasks/pick_place/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roboverse_pack/tasks/pick_place/README.md b/roboverse_pack/tasks/pick_place/README.md index fb36e4bdb..9c0457e25 100644 --- a/roboverse_pack/tasks/pick_place/README.md +++ b/roboverse_pack/tasks/pick_place/README.md @@ -26,7 +26,7 @@ Evaluate the trained model and collect stable grasp states and first-half trajec ```bash python -m roboverse_learn.rl.fast_td3.evaluate_lift \ - --checkpoint models/spoon_basket/pick_place.approach_grasp_simple_spoon_basket_1065000.pt \ + --checkpoint models/pick_place.approach_grasp_simple_65000.pt \ --target_count 100 \ --state_dir eval_states \ --traj_dir eval_trajs From bbd2ba5dfe46ff2d7b67e40f763530821b2daca6 Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:43:22 -0800 Subject: [PATCH 25/37] roll back --- scripts/advanced/collect_demo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/advanced/collect_demo.py b/scripts/advanced/collect_demo.py index 8bf2e7dad..985bd2323 100644 --- a/scripts/advanced/collect_demo.py +++ b/scripts/advanced/collect_demo.py @@ -688,4 +688,5 @@ def main(): if __name__ == "__main__": + main() From 342a44b0016437f7db745e35cea03af404cf773c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 00:43:34 +0000 Subject: [PATCH 26/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/advanced/collect_demo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/advanced/collect_demo.py b/scripts/advanced/collect_demo.py index 985bd2323..8bf2e7dad 100644 --- a/scripts/advanced/collect_demo.py +++ b/scripts/advanced/collect_demo.py @@ -688,5 +688,4 @@ def main(): if __name__ == "__main__": - main() From 7612ce4c7f5f286b8778c10be5556e45012f3a2e Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:44:11 -0800 Subject: [PATCH 27/37] roll back collect demo From d8c9b24eaa03f21820b2e268750b05e632b0c10e Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:45:21 -0800 Subject: [PATCH 28/37] roll back collect demo From c0a8831c720094a011702eb54d3914320fd94936 Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:47:01 -0800 Subject: [PATCH 29/37] Revise devcontainer.json for development setup Updated the devcontainer configuration with new settings, including service name, workspace folder, and VSCode customizations. --- .devcontainer/devcontainer.json | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cb3c3b638..4820ba2f1 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,8 +1,21 @@ { - "name": "RoboVerse DevContainer", - "image": "ubuntu:20.04", - "mounts": [ - "source=${localEnv:HOME}/.ssh,target=/root/.ssh,type=bind" - ], - "postCreateCommand": "apt update && apt install -y git openssh-client && git config --global user.name 'Boqi Zhao' && git config --global user.email 'your_email@example.com'" + "name": "RoboVerse Development Container", + "privileged": true, + "dockerComposeFile": "../docker-compose.yml", + "service": "metasim", + "workspaceFolder": "/home/${localEnv:USER}/RoboVerse", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", // for vscode user + "anysphere.cursorpyright", // for cursor user + "charliermarsh.ruff" + ], + "settings": { + "terminal.integrated.defaultProfile.linux": "bash", + "terminal.integrated.profiles.linux": { "bash": { "path": "/bin/bash" } } + } + } + } } From ae75807ba7f253273fdf21de95336e6b4254de36 Mon Sep 17 00:00:00 2001 From: HanchuZhou Date: Sun, 14 Dec 2025 16:47:39 -0800 Subject: [PATCH 30/37] Remove devcontainer.json from .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index a9da1f783..1a246f924 100644 --- a/.gitignore +++ b/.gitignore @@ -109,4 +109,3 @@ id_rsa* *.pkl *.pt output -.devcontainer/devcontainer.json From 3a5f9ec4c1fea162f2a9d8521951c078275dd3c8 Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Mon, 15 Dec 2025 21:11:05 -0800 Subject: [PATCH 31/37] hardcode approach_grasp; roll back rl/fast_td3/train and base.pyto pr690; v_max in banana.yaml; isaacgym.py bugfix. --- metasim/sim/isaacgym/isaacgym.py | 4 +- .../rl/fast_td3/configs/track_banana.yaml | 4 +- roboverse_learn/rl/fast_td3/train.py | 43 +- .../tasks/pick_place/approach_grasp.py | 2 +- .../tasks/pick_place/approach_grasp_banana.py | 220 ++++----- .../pick_place/approach_grasp_screwdriver.py | 223 ++++----- .../tasks/pick_place/approach_grasp_spoon.py | 434 ++++++++---------- roboverse_pack/tasks/pick_place/base.py | 33 +- 8 files changed, 390 insertions(+), 573 deletions(-) diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index cf0a38050..e1037e612 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -774,7 +774,7 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]: # Apply GS background rendering if enabled # TODO: Render with batch parallelization for efficiency - if self.scenario.gs_scene.with_gs_background and self.gs_background is not None: + if self.scenario.gs_scene is not None and self.scenario.gs_scene.with_gs_background and self.gs_background is not None: assert ROBO_SPLATTER_AVAILABLE, "RoboSplatter is not available. GS background rendering will be disabled." camera_states = self._apply_gs_background_rendering(camera_states, env_ids) @@ -1244,4 +1244,4 @@ def robot_num_dof(self) -> int: # TODO: try to align handler API and use GymWrapper instead -# IsaacgymEnv: type[EnvWrapper[IsaacgymHandler]] = GymEnvWrapper(IsaacgymHandler) +# IsaacgymEnv: type[EnvWrapper[IsaacgymHandler]] = GymEnvWrapper(IsaacgymHandler) \ No newline at end of file diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml index f5b6f4d5b..ae77d0b0a 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -10,7 +10,7 @@ robots: ["franka"] task: "pick_place.track_banana" decimation: 4 train_or_eval: "train" -headless: False +headless: True # State file path for track task (pkl file path for grasp states) # If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE @@ -62,7 +62,7 @@ num_atoms: 101 # Value Distribution & Exploration # ------------------------------------------------------------------------------- v_min: 0 -v_max: 600.0 +v_max: 1500.0 policy_noise: 0.001 std_min: 0.001 std_max: 0.4 diff --git a/roboverse_learn/rl/fast_td3/train.py b/roboverse_learn/rl/fast_td3/train.py index 5effe6e06..f74408b48 100644 --- a/roboverse_learn/rl/fast_td3/train.py +++ b/roboverse_learn/rl/fast_td3/train.py @@ -58,11 +58,11 @@ def get_config(): torch.set_float32_matmul_precision("high") -import inspect import numpy as np import torch import torch.nn.functional as F import tqdm +import wandb from loguru import logger as log from tensordict import TensorDict from torch import optim @@ -137,14 +137,11 @@ def main() -> None: scaler = GradScaler(enabled=amp_enabled and amp_dtype == torch.float16) - # Import wandb if enabled - if cfg("use_wandb"): - import wandb - if cfg("train_or_eval") == "train": - wandb.init( - project=cfg("wandb_project", "fttd3_training"), - save_code=True, - ) + if cfg("use_wandb") and cfg("train_or_eval") == "train": + wandb.init( + project=cfg("wandb_project", "fttd3_training"), + save_code=True, + ) random.seed(cfg("seed")) np.random.seed(cfg("seed")) @@ -169,23 +166,9 @@ def main() -> None: scenario = task_cls.scenario.update( robots=cfg("robots"), simulator=cfg("sim"), num_envs=cfg("num_envs"), headless=cfg("headless"), cameras=[] ) - # Check if task class accepts state_file_path parameter (only track tasks do) - init_signature = inspect.signature(task_cls.__init__) - accepts_state_file_path = "state_file_path" in init_signature.parameters - - # Pass state_file_path from config if task accepts it (for track tasks) - state_file_path = cfg("state_file_path", None) - if accepts_state_file_path and state_file_path is not None: - envs = task_cls(scenario, device=device, state_file_path=state_file_path) - else: - envs = task_cls(scenario, device=device) - # Only use viser wrapper if not headless and viser is available - if not cfg("headless"): - try: - from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper - envs = TaskViserWrapper(envs) - except ImportError: - log.warning("Viser not available, skipping visualization wrapper") + envs = task_cls(scenario, device=device) + from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper + envs = TaskViserWrapper(envs) eval_envs = envs # ---------------- derive shapes ------------------------------------ @@ -316,11 +299,7 @@ def render_with_rollout() -> list: scenario_render = scenario.update( robots=robots, simulator=simulator, num_envs=num_envs, headless=headless, cameras=cameras ) - # Pass state_file_path from config if task accepts it (for track tasks) - if accepts_state_file_path and state_file_path is not None: - env = task_cls(scenario_render, device=device, state_file_path=state_file_path) - else: - env = task_cls(scenario_render, device=device) + env = task_cls(scenario_render, device=device) obs_normalizer.eval() obs, info = env.reset() @@ -586,4 +565,4 @@ def update_pol(data, logs_dict): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py index 9ab913675..4a3b0ff02 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -418,4 +418,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init + return init \ No newline at end of file diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index b94c152a3..3b0e1321c 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -153,130 +149,98 @@ class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers - from saved_poses_20251206_banana_basket.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251206_banana_basket.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from object_layout.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Banana as the object to pick - "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), - "table": saved_poses["objects"]["table"], - "lamp": saved_poses["objects"].get("lamp"), - "basket": saved_poses["objects"].get("basket"), - "bowl": saved_poses["objects"].get("bowl"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "spoon": saved_poses["objects"].get("spoon"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.610000, 0.200000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.610000, -0.300000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.350000, 0.250000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.180000, -0.070000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.580000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.280000, -0.540000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.320000, -0.490000, 0.910000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.330000, -0.430000, 1.110000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.360000, -0.350000, 1.210000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.600000, -0.310000, 1.190000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813415]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from original approach_grasp_banana.py - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "lamp": { - "pos": torch.tensor([0.610000, 0.200000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.610000, -0.300000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.350000, 0.250000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.180000, -0.070000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "spoon": { - "pos": torch.tensor([0.530000, -0.690000, 0.850000]), - "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), - }, - "object": { - "pos": torch.tensor([0.280000, -0.580000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.280000, -0.540000, 0.850000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.320000, -0.490000, 0.910000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.330000, -0.430000, 1.110000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.360000, -0.350000, 1.210000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.600000, -0.310000, 1.190000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813415]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index a5b8def81..ed56dc954 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -153,133 +149,98 @@ class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads screwdriver, table, basket, and trajectory markers - from saved_poses_20251206_screwdriver_basket.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251206_screwdriver_basket.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from object_layout.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Screwdriver as the object to pick - "object": saved_poses["objects"].get( - "screwdriver", - saved_poses["objects"].get("screw_driver", saved_poses["objects"].get("object")), - ), - "table": saved_poses["objects"]["table"], - "lamp": saved_poses["objects"].get("lamp"), - "basket": saved_poses["objects"].get("basket"), - "bowl": saved_poses["objects"].get("bowl"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "spoon": saved_poses["objects"].get("spoon"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251206_screwdriver_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.240000, -0.440000, 0.925000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.230000, 0.090000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "object": { + "pos": torch.tensor([0.590000, -0.360000, 0.811000]), + "rot": torch.tensor([-0.824810, -0.390085, -0.023230, 0.408623]), + }, + "spoon": { + "pos": torch.tensor([0.470000, -0.710000, 0.830000]), + "rot": torch.tensor([0.928125, -0.061844, -0.058501, -0.362397]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.600000, -0.330000, 0.840000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.560000, -0.400000, 1.010000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.530000, -0.400000, 1.140000]), + "rot": torch.tensor([0.962425, -0.271547, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.430000, -0.340000, 1.160000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.260000, -0.400000, 1.170000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813414]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from original approach_grasp_screwdriver.py - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "lamp": { - "pos": torch.tensor([0.680000, 0.310000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.240000, -0.440000, 0.925000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.620000, -0.080000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.230000, 0.090000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "object": { - "pos": torch.tensor([0.590000, -0.360000, 0.811000]), - "rot": torch.tensor([-0.824810, -0.390085, -0.023230, 0.408623]), - }, - "spoon": { - "pos": torch.tensor([0.470000, -0.710000, 0.830000]), - "rot": torch.tensor([0.928125, -0.061844, -0.058501, -0.362397]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.600000, -0.330000, 0.840000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.560000, -0.400000, 1.010000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.530000, -0.400000, 1.140000]), - "rot": torch.tensor([0.962425, -0.271547, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.430000, -0.340000, 1.160000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.260000, -0.400000, 1.170000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813414]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py index ee9f42ced..b83d5f63d 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -153,130 +149,99 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from saved_poses_20251212_spoon.py. Loads spoon, table, basket, and trajectory markers. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251212_spoon.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from saved_poses_20251212_spoon.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Spoon as the object to pick - "object": saved_poses["objects"].get("spoon"), - "table": saved_poses["objects"].get("table"), - "basket": saved_poses["objects"].get("basket"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "mug": saved_poses["objects"].get("mug"), - "book": saved_poses["objects"].get("book"), - "vase": saved_poses["objects"].get("vase"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251212_spoon.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.210000, 0.250000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.560000, -0.730000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.200000, -0.590000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), + }, + "mug": { + "pos": torch.tensor([0.690000, -0.550000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.700000, 0.280000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.050000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.220000, -0.550000, 0.880000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.220000, -0.310000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.210000, -0.250000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.210000, 0.040000, 1.250000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.190000, 0.250000, 1.010000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.560000, -0.230001, 0.800000]), + "rot": torch.tensor([0.120502, -0.000001, -0.000001, 0.992712]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from saved_poses_20251212_spoon.py as hardcoded values - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.440000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.210000, 0.250000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.560000, -0.730000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "object": { - "pos": torch.tensor([0.200000, -0.590000, 0.820000]), - "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), - }, - "mug": { - "pos": torch.tensor([0.690000, -0.550000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "book": { - "pos": torch.tensor([0.700000, 0.280000, 0.820000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "vase": { - "pos": torch.tensor([0.680000, 0.050000, 0.950000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.220000, -0.550000, 0.880000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.220000, -0.310000, 0.900000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.210000, -0.250000, 1.080000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.210000, 0.040000, 1.250000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.190000, 0.250000, 1.010000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.560000, -0.230001, 0.800000]), - "rot": torch.tensor([0.120502, -0.000001, -0.000001, 0.992712]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["spoon"], + "table": saved_poses["objects"]["table"], + "basket": saved_poses["objects"]["basket"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "mug": saved_poses["objects"]["mug"], + "book": saved_poses["objects"]["book"], + "vase": saved_poses["objects"]["vase"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init @@ -290,129 +255,98 @@ class PickPlaceApproachGraspSimpleSpoon2(PickPlaceApproachGraspSimpleSpoon): """ def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from saved_poses_20251212_spoon2.py. Loads spoon, table, basket, and trajectory markers. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251212_spoon2.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from saved_poses_20251212_spoon2.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Spoon as the object to pick - "object": saved_poses["objects"].get("spoon"), - "table": saved_poses["objects"].get("table"), - "basket": saved_poses["objects"].get("basket"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "mug": saved_poses["objects"].get("mug"), - "book": saved_poses["objects"].get("book"), - "vase": saved_poses["objects"].get("vase"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251212_spoon2.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.640000, -0.620000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.290000, 0.370000, 0.820000]), + "rot": torch.tensor([0.640996, -0.000000, -0.000000, 0.767544]), + }, + "spoon": { + "pos": torch.tensor([0.140000, -0.370000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), + }, + "mug": { + "pos": torch.tensor([0.520000, 0.250000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.240000, 0.160000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.270000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.150000, -0.350000, 0.860000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.240000, -0.460000, 0.940000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.270000, -0.610000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.380000, -0.640000, 1.310000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.670000, -0.620000, 1.020000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.550000, -0.120000, 0.800000]), + "rot": torch.tensor([-0.393287, -0.000001, -0.000000, 0.919414]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from saved_poses_20251212_spoon2.py as hardcoded values - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.440000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.640000, -0.620000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.290000, 0.370000, 0.820000]), - "rot": torch.tensor([0.640996, -0.000000, -0.000000, 0.767544]), - }, - "object": { - "pos": torch.tensor([0.140000, -0.370000, 0.820000]), - "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), - }, - "mug": { - "pos": torch.tensor([0.520000, 0.250000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "book": { - "pos": torch.tensor([0.240000, 0.160000, 0.820000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "vase": { - "pos": torch.tensor([0.680000, 0.270000, 0.950000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.150000, -0.350000, 0.860000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.240000, -0.460000, 0.940000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.270000, -0.610000, 1.080000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.380000, -0.640000, 1.310000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.670000, -0.620000, 1.020000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.550000, -0.120000, 0.800000]), - "rot": torch.tensor([-0.393287, -0.000001, -0.000000, 0.919414]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["spoon"], + "table": saved_poses["objects"]["table"], + "basket": saved_poses["objects"]["basket"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "mug": saved_poses["objects"]["mug"], + "book": saved_poses["objects"]["book"], + "vase": saved_poses["objects"]["vase"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/base.py b/roboverse_pack/tasks/pick_place/base.py index ba472f47f..dcd4dec27 100644 --- a/roboverse_pack/tasks/pick_place/base.py +++ b/roboverse_pack/tasks/pick_place/base.py @@ -336,21 +336,6 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: target_pos = self.waypoint_positions[self.current_waypoint_idx] distance = torch.norm(ee_pos - target_pos, dim=-1) - # Progress reward: reward for reducing distance to waypoint - # This encourages the agent to make progress toward the current waypoint - # Only reward progress if we haven't reached this waypoint yet - not_already_reached = ~self.waypoints_reached[ - torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx - ] - distance_reduction = self.prev_distance_to_waypoint - distance - # Scale progress reward: 0.1 means progress reward is 10% of waypoint reached bonus - # This provides continuous guidance without overwhelming the sparse reward - progress_reward_component = ( - torch.clamp(distance_reduction * self.w_tracking_progress * 0.1, min=0.0) - * not_already_reached.float() - * grasped_mask.float() - ) - # Distance-based reward (far + near) / 2 distance_reward_far = 1 - torch.tanh(1.0 * distance) distance_reward_near = 1 - torch.tanh(10.0 * distance) @@ -373,13 +358,10 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Both distance and rotation must be satisfied to consider as reached reached = distance_reached & rotation_reached - newly_reached = reached & not_already_reached - # Waypoint reached bonus (one-time large reward when reaching a waypoint) - waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress - - # Update prev_distance for next step (only if not newly reached, - # because if newly reached, we'll update it when advancing to next waypoint) - self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] + newly_reached = reached & ( + ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + ) + progress_reward = newly_reached.float() * self.w_tracking_progress if newly_reached.any(): if newly_reached[0]: @@ -423,10 +405,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: (1 - torch.tanh(1.0 * distance_to_last[completed_mask])) * self.w_tracking_approach, ) - # Combine all reward components: approach reward + progress reward + waypoint reached bonus - tracking_reward = torch.where( - all_reached, maintain_reward, approach_reward + progress_reward_component + waypoint_reached_bonus - ) + tracking_reward = torch.where(all_reached, maintain_reward, approach_reward + progress_reward) return tracking_reward @@ -670,4 +649,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init + return init \ No newline at end of file From 1d74b67f97ed97a2c9eefb5cf75062252355a2dd Mon Sep 17 00:00:00 2001 From: Anchor1021 Date: Mon, 15 Dec 2025 21:27:28 -0800 Subject: [PATCH 32/37] Apply pre-commit formatting and lint fixes --- metasim/sim/isaacgym/isaacgym.py | 8 ++++++-- roboverse_learn/rl/fast_td3/train.py | 2 +- roboverse_pack/tasks/pick_place/approach_grasp.py | 2 +- roboverse_pack/tasks/pick_place/base.py | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index e1037e612..3f6252887 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -774,7 +774,11 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]: # Apply GS background rendering if enabled # TODO: Render with batch parallelization for efficiency - if self.scenario.gs_scene is not None and self.scenario.gs_scene.with_gs_background and self.gs_background is not None: + if ( + self.scenario.gs_scene is not None + and self.scenario.gs_scene.with_gs_background + and self.gs_background is not None + ): assert ROBO_SPLATTER_AVAILABLE, "RoboSplatter is not available. GS background rendering will be disabled." camera_states = self._apply_gs_background_rendering(camera_states, env_ids) @@ -1244,4 +1248,4 @@ def robot_num_dof(self) -> int: # TODO: try to align handler API and use GymWrapper instead -# IsaacgymEnv: type[EnvWrapper[IsaacgymHandler]] = GymEnvWrapper(IsaacgymHandler) \ No newline at end of file +# IsaacgymEnv: type[EnvWrapper[IsaacgymHandler]] = GymEnvWrapper(IsaacgymHandler) diff --git a/roboverse_learn/rl/fast_td3/train.py b/roboverse_learn/rl/fast_td3/train.py index f74408b48..dfb03aaab 100644 --- a/roboverse_learn/rl/fast_td3/train.py +++ b/roboverse_learn/rl/fast_td3/train.py @@ -565,4 +565,4 @@ def update_pol(data, logs_dict): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/roboverse_pack/tasks/pick_place/approach_grasp.py b/roboverse_pack/tasks/pick_place/approach_grasp.py index 4a3b0ff02..9ab913675 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp.py @@ -418,4 +418,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init \ No newline at end of file + return init diff --git a/roboverse_pack/tasks/pick_place/base.py b/roboverse_pack/tasks/pick_place/base.py index dcd4dec27..46e41be62 100644 --- a/roboverse_pack/tasks/pick_place/base.py +++ b/roboverse_pack/tasks/pick_place/base.py @@ -649,4 +649,4 @@ def _get_initial_states(self) -> list[dict] | None: for _ in range(self.num_envs) ] - return init \ No newline at end of file + return init From a9b421ed66dd72f8f4bab0fb7923b636bcf86849 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Thu, 18 Dec 2025 15:55:51 -0800 Subject: [PATCH 33/37] In evaluate_lift.py, record the state that N frames after grasping; Add banana2; Set the rotation_tracking weight to 0 --- roboverse_learn/rl/fast_td3/evaluate.py | 2 +- roboverse_learn/rl/fast_td3/evaluate_lift.py | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/roboverse_learn/rl/fast_td3/evaluate.py b/roboverse_learn/rl/fast_td3/evaluate.py index 0a2d984a4..f379ee73a 100644 --- a/roboverse_learn/rl/fast_td3/evaluate.py +++ b/roboverse_learn/rl/fast_td3/evaluate.py @@ -496,7 +496,7 @@ def main(): parser.add_argument('--device_rank', type=int, default=0, help='GPU device rank') - parser.add_argument('--num_envs', type=int, default=None, + parser.add_argument('--num_envs', type=int, default=5, help='Number of parallel environments (default: from checkpoint config)') parser.add_argument('--headless', action='store_true', help='Run in headless mode') diff --git a/roboverse_learn/rl/fast_td3/evaluate_lift.py b/roboverse_learn/rl/fast_td3/evaluate_lift.py index ba613cb31..405d7ba35 100644 --- a/roboverse_learn/rl/fast_td3/evaluate_lift.py +++ b/roboverse_learn/rl/fast_td3/evaluate_lift.py @@ -165,11 +165,13 @@ def evaluate_lift_collection( lift_start_state = {} lift_frame_count = {} + lift_capture_recorded = {} in_lift_phase = {} recording_traj = {} for i in range(num_eval_envs): lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False @@ -245,8 +247,9 @@ def evaluate_lift_collection( if grasp_success and lift_active and not in_lift_phase[i]: in_lift_phase[i] = True - lift_start_state[i] = extract_state_dict(env, scenario, env_idx=i) - lift_frame_count[i] = 1 + lift_start_state[i] = None + lift_frame_count[i] = 0 + lift_capture_recorded[i] = False recording_traj[i] = True log.info(f"[Env {i}] Entered lift phase (grasp success and lift active)") @@ -255,7 +258,12 @@ def evaluate_lift_collection( if lift_active and grasp_success: lift_frame_count[i] += 1 - if lift_frame_count[i] >= lift_stable_frames: + if (not lift_capture_recorded[i]) and lift_frame_count[i] >= lift_stable_frames: + # Capture the state after holding grasp for lift_stable_frames frames + lift_start_state[i] = extract_state_dict(env, scenario, env_idx=i) + lift_capture_recorded[i] = True + + if lift_capture_recorded[i] and lift_frame_count[i] >= lift_stable_frames * 2: traj_data = { "init_state": current_episode_init_state[i], "actions": current_episode_actions[i], @@ -280,6 +288,7 @@ def evaluate_lift_collection( lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False @@ -287,6 +296,7 @@ def evaluate_lift_collection( done_masks[i] = True else: lift_frame_count[i] = 0 + lift_capture_recorded[i] = False if not grasp_success: in_lift_phase[i] = False recording_traj[i] = False @@ -303,6 +313,7 @@ def evaluate_lift_collection( lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False current_episode_actions[i] = [] @@ -322,6 +333,7 @@ def evaluate_lift_collection( for i in range(num_eval_envs): lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False current_episode_actions[i] = [] From 037e7dc7f3a640ab78a40cd489fcf4094b2237bc Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Thu, 18 Dec 2025 15:57:57 -0800 Subject: [PATCH 34/37] Change evaluate_lift.py, make sure the gripper holds the object; Add banana2; Set rotation_tracking weight to 0; --- .../fast_td3/configs/pick_place_banana2.yaml | 89 ++++ .../rl/fast_td3/configs/track_banana.yaml | 2 +- .../rl/fast_td3/configs/track_banana2.yaml | 95 ++++ .../fast_td3/configs/track_screwdriver.yaml | 2 +- .../tasks/pick_place/approach_grasp_banana.py | 2 +- .../pick_place/approach_grasp_banana2.py | 288 +++++++++++ .../pick_place/approach_grasp_screwdriver.py | 2 +- .../tasks/pick_place/track_banana.py | 21 +- .../tasks/pick_place/track_banana2.py | 450 ++++++++++++++++++ .../tasks/pick_place/track_screwdriver.py | 4 +- 10 files changed, 939 insertions(+), 16 deletions(-) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/track_banana2.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana2.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana2.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml new file mode 100644 index 000000000..a962f4aa4 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana2" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana2" # Unique run name +model_dir: "models/banana2" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml index f5b6f4d5b..6746cf665 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -62,7 +62,7 @@ num_atoms: 101 # Value Distribution & Exploration # ------------------------------------------------------------------------------- v_min: 0 -v_max: 600.0 +v_max: 2000.0 policy_noise: 0.001 std_min: 0.001 std_max: 0.4 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml new file mode 100644 index 000000000..27f64e9c7 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Banana) +# Stage 3: Track task for banana object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_banana, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_banana2" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_simple_banana2_franka_lift_states_103states_20251217_213149.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 2000.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_banana2" +exp_name: "pick_place_track_banana2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_banana2" # Unique run name for banana track task +model_dir: "models/track_banana2" # Separate directory for track banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml index 0c3efbde9..8893290af 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml @@ -62,7 +62,7 @@ num_atoms: 101 # Value Distribution & Exploration # ------------------------------------------------------------------------------- v_min: 0 -v_max: 600.0 +v_max: 2000.0 policy_noise: 0.001 std_min: 0.001 std_max: 0.4 diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index b94c152a3..eb5d699e1 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -19,7 +19,7 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_banana", "pick_place_approach_grasp_simple_banana") +@register_task("pick_place.approach_grasp_simple_banana", "pick_place.approach_grasp_simple_banana", "pick_place.approach_grasp_banana") class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for banana object. diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py new file mode 100644 index 000000000..18b14e3b8 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py @@ -0,0 +1,288 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task("pick_place.approach_grasp_simple_banana2", "pick_place.approach_grasp_simple_banana2", "pick_place.approach_grasp_banana2") +class PickPlaceApproachGraspSimpleBanana2(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="screw_driver", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.600000, 0.310000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.340000, 0.250000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "screw_driver": { + "pos": torch.tensor([0.190000, 0.100000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.350000, 0.270000, 0.860000]), + "rot": torch.tensor([0.952675, -0.005654, 0.187418, 0.239269]), + }, + "object": { + "pos": torch.tensor([0.570000, -0.200000, 0.795000]), + "rot": torch.tensor([0.644470, -0.113948, 0.110352, 0.747993]), + }, + "mug": { + "pos": torch.tensor([0.310000, 0.070000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.210000, -0.480000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.580000, -0.220000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.540000, -0.230000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.440000, -0.310000, 0.970000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.310000, -0.380000, 0.950000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.240000, -0.480000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index a5b8def81..ce48860d8 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -19,7 +19,7 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_screwdriver", "pick_place_approach_grasp_simple_screwdriver") +@register_task("pick_place.approach_grasp_simple_screwdriver", "pick_place.approach_grasp_screwdriver", "pick_place_approach_grasp_simple_screwdriver") class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for screwdriver object. diff --git a/roboverse_pack/tasks/pick_place/track_banana.py b/roboverse_pack/tasks/pick_place/track_banana.py index 324b6b066..48628abe8 100644 --- a/roboverse_pack/tasks/pick_place/track_banana.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -129,7 +129,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ "tracking_approach": 4.0, "tracking_progress": 150.0, - "rotation_tracking": 2.0, + "rotation_tracking": 0.0, }) # Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) @@ -182,6 +182,15 @@ class PickPlaceTrackBanana(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual banana mesh from EmbodiedGenData (matches object_layout.py) RigidObjCfg( name="object", scale=(1, 1, 1), @@ -190,14 +199,6 @@ class PickPlaceTrackBanana(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", ), - RigidObjCfg( - name="screw_driver", - scale=(1.5, 1.5, 1.5), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", - ), RigidObjCfg( name="spoon", scale=(1, 1, 1), @@ -306,7 +307,7 @@ def __init__(self, scenario, device=None): ] self.reward_weights = [ 1.0, - 1.0, # rotation_tracking weight is already applied inside the function + 0.0, # rotation_tracking weight is already applied inside the function ] def _prepare_states(self, states, env_ids): diff --git a/roboverse_pack/tasks/pick_place/track_banana2.py b/roboverse_pack/tasks/pick_place/track_banana2.py new file mode 100644 index 000000000..26f838bdc --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana2.py @@ -0,0 +1,450 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana2", "pick_place_track_banana2") +class PickPlaceTrackBanana2(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual banana mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_screwdriver.py b/roboverse_pack/tasks/pick_place/track_screwdriver.py index 543aaafb0..3b49b7761 100644 --- a/roboverse_pack/tasks/pick_place/track_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/track_screwdriver.py @@ -129,7 +129,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ "tracking_approach": 4.0, "tracking_progress": 150.0, - "rotation_tracking": 2.0, + "rotation_tracking": 0.0, }) # Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) @@ -306,7 +306,7 @@ def __init__(self, scenario, device=None): ] self.reward_weights = [ 1.0, - 1.0, # rotation_tracking weight is already applied inside the function + 0.0, # rotation_tracking weight is already applied inside the function ] self.grasp_check_distance = 0.25 From 2beb889a9e9b7114e3e3cad75b0ce4345b934981 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Dec 2025 23:58:41 +0000 Subject: [PATCH 35/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- roboverse_pack/tasks/pick_place/approach_grasp_banana.py | 6 +++++- roboverse_pack/tasks/pick_place/approach_grasp_banana2.py | 6 +++++- .../tasks/pick_place/approach_grasp_screwdriver.py | 6 +++++- roboverse_pack/tasks/pick_place/track_banana2.py | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index cfad05b56..966d341ab 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -15,7 +15,11 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_banana", "pick_place.approach_grasp_simple_banana", "pick_place.approach_grasp_banana") +@register_task( + "pick_place.approach_grasp_simple_banana", + "pick_place.approach_grasp_simple_banana", + "pick_place.approach_grasp_banana", +) class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for banana object. diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py index 18b14e3b8..586cb93fd 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py @@ -19,7 +19,11 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_banana2", "pick_place.approach_grasp_simple_banana2", "pick_place.approach_grasp_banana2") +@register_task( + "pick_place.approach_grasp_simple_banana2", + "pick_place.approach_grasp_simple_banana2", + "pick_place.approach_grasp_banana2", +) class PickPlaceApproachGraspSimpleBanana2(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for banana object. diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index 7913bf789..e79fbd05d 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -15,7 +15,11 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_screwdriver", "pick_place.approach_grasp_screwdriver", "pick_place_approach_grasp_simple_screwdriver") +@register_task( + "pick_place.approach_grasp_simple_screwdriver", + "pick_place.approach_grasp_screwdriver", + "pick_place_approach_grasp_simple_screwdriver", +) class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for screwdriver object. diff --git a/roboverse_pack/tasks/pick_place/track_banana2.py b/roboverse_pack/tasks/pick_place/track_banana2.py index 26f838bdc..de3fa5da4 100644 --- a/roboverse_pack/tasks/pick_place/track_banana2.py +++ b/roboverse_pack/tasks/pick_place/track_banana2.py @@ -416,7 +416,7 @@ def step(self, actions): delta_actions = actions * self._action_scale new_actions = current_joint_pos + delta_actions real_actions = torch.clamp(new_actions, self._action_low, self._action_high) - + # delta_actions = actions * self._action_scale # new_actions = self._last_action + delta_actions From 6a0cf993b1daf853a26807903fedb10bc5e415a8 Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Thu, 18 Dec 2025 17:14:17 -0800 Subject: [PATCH 36/37] Add several new tasks --- .../fast_td3/configs/pick_place_banana3.yaml | 89 ++++ .../configs/pick_place_cuttingtool2.yaml | 89 ++++ .../configs/pick_place_cuttingtool3.yaml | 89 ++++ .../rl/fast_td3/configs/track_banana3.yaml | 94 ++++ .../fast_td3/configs/track_cuttingtool2.yaml | 95 ++++ .../fast_td3/configs/track_cuttingtool3.yaml | 95 ++++ .../pick_place/approach_grasp_banana3.py | 281 +++++++++++ .../pick_place/approach_grasp_cuttingtool2.py | 245 ++++++++++ .../pick_place/approach_grasp_cuttingtool3.py | 245 ++++++++++ .../tasks/pick_place/track_banana3.py | 445 ++++++++++++++++++ .../tasks/pick_place/track_cuttingtool2.py | 445 ++++++++++++++++++ .../tasks/pick_place/track_cuttingtool3.py | 445 ++++++++++++++++++ 12 files changed, 2657 insertions(+) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/track_banana3.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana3.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana3.py create mode 100644 roboverse_pack/tasks/pick_place/track_cuttingtool2.py create mode 100644 roboverse_pack/tasks/pick_place/track_cuttingtool3.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml new file mode 100644 index 000000000..58ce46335 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana3" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana3" # Unique run name +model_dir: "models/banana3" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml new file mode 100644 index 000000000..4b92f2fc8 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Cutting Tool +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_cuttingtool2" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_cuttingtool2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_cuttingtool2" # Unique run name +model_dir: "models/cuttingtool2" # Separate directory for cuttingtool2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml new file mode 100644 index 000000000..59597950b --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Cutting Tool +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_cuttingtool3" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_cuttingtool3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_cuttingtool3" # Unique run name +model_dir: "models/cuttingtool3" # Separate directory for cuttingtool3 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml new file mode 100644 index 000000000..a0925b38f --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml @@ -0,0 +1,94 @@ +# Configuration for FastTD3 Training - Track Task (Banana) +# Stage 3: Track task for banana object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_banana, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_banana3" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_simple_banana3_franka_lift_states_103states_20251217_213149.pkl" + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 2000.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_banana3" +exp_name: "pick_place_track_banana3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_banana3" # Unique run name for banana track task +model_dir: "models/track_banana3" # Separate directory for track banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml new file mode 100644 index 000000000..809538389 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Cutting Tool) +# Stage 3: Track task for cutting tool object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_cuttingtool2, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_cuttingtool2" +decimation: 4 +train_or_eval: "train" +headless: True + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_cuttingtool2_franka_lift_states_100states_20251210_153518.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 1500.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_cuttingtool2" +exp_name: "pick_place_track_cuttingtool2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_cuttingtool2" # Unique run name for cuttingtool2 track task +model_dir: "models/track_cuttingtool2" # Separate directory for track cuttingtool2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml new file mode 100644 index 000000000..e17e45a75 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Cutting Tool) +# Stage 3: Track task for cutting tool object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_cuttingtool2, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_cuttingtool3" +decimation: 4 +train_or_eval: "train" +headless: True + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_cuttingtool3_franka_lift_states_100states_20251210_153518.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 1500.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_cuttingtool3" +exp_name: "pick_place_track_cuttingtool3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_cuttingtool3" # Unique run name for cuttingtool3 track task +model_dir: "models/track_cuttingtool3" # Separate directory for track cuttingtool3 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py new file mode 100644 index 000000000..68ddfa0c2 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py @@ -0,0 +1,281 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana3", + "pick_place.approach_grasp_simple_banana3", + "pick_place.approach_grasp_banana3", +) +class PickPlaceApproachGraspSimpleBanana3(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.140000, -0.090000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.240000, -0.630000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "mug": { + "pos": torch.tensor([0.540000, -0.710000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.620000, -0.120000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([0.911038, 0.000000, -0.000000, 0.412321]), + }, + "vase": { + "pos": torch.tensor([0.300000, 0.050000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.220000, -0.630000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.270000, -0.560000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.360000, -0.470000, 0.920000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.390000, 0.900000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.450000, -0.310000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.519098, -0.000000, -0.000001, 0.854714]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py new file mode 100644 index 000000000..13a9e9480 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py @@ -0,0 +1,245 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import torch + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_cuttingtool2", + "pick_place.approach_grasp_simple_cuttingtool2", + "pick_place.approach_grasp_cuttingtool2", +) +class PickPlaceApproachGraspSimpleCuttingTool2(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for cutting tool object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses cutting tool mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_cuttingtool2_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.530000, 0.180000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.570000, -0.330000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.220000, -0.020000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "mug": { + "pos": torch.tensor([0.500000, -0.710000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.220000, -0.520000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.260000, -0.270000, 0.811000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.510000, -0.290000, 0.830000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.480000, -0.340000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.360000, -0.410000, 0.930000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.250000, -0.470000, 0.930000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.230000, -0.520000, 0.890000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.640796, 0.019186, 0.016023, 0.767304]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py new file mode 100644 index 000000000..51e6149f9 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py @@ -0,0 +1,245 @@ +"""Stage 1: Simple Approach and Grasp task for cutting tool object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the cutting tool object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import torch + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_cuttingtool3", + "pick_place.approach_grasp_simple_cuttingtool3", + "pick_place.approach_grasp_cuttingtool3", +) +class PickPlaceApproachGraspSimpleCuttingTool3(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for cutting tool object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses cutting tool mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_cuttingtool3_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.330000, 0.160000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.320000, -0.760000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.580000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "mug": { + "pos": torch.tensor([0.570000, 0.040000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.550000, -0.280000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.170000, -0.190000, 0.811000]), + "rot": torch.tensor([0.921060, 0.000000, -0.000000, 0.389418]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.230000, -0.680000, 0.830000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.240000, -0.620000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.290000, -0.560000, 0.920000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.340000, -0.460000, 0.930000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.550000, -0.290000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.640796, 0.019186, 0.016023, 0.767304]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + + return init diff --git a/roboverse_pack/tasks/pick_place/track_banana3.py b/roboverse_pack/tasks/pick_place/track_banana3.py new file mode 100644 index 000000000..bece9763c --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana3.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana3", "pick_place_track_banana3") +class PickPlaceTrackBanana3(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_cuttingtool2.py b/roboverse_pack/tasks/pick_place/track_cuttingtool2.py new file mode 100644 index 000000000..b74f94121 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_cuttingtool2.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_cuttingtool2", "pick_place_track_cuttingtool2") +class PickPlaceTrackCuttingTool2(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_cuttingtool3.py b/roboverse_pack/tasks/pick_place/track_cuttingtool3.py new file mode 100644 index 000000000..2c2278579 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_cuttingtool3.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_cuttingtool3", "pick_place_track_cuttingtool3") +class PickPlaceTrackCuttingTool3(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info From 3fceee9fa2e1c1df55c346092079235c3e2931aa Mon Sep 17 00:00:00 2001 From: "zhouhanchu@hotmail.com" Date: Wed, 14 Jan 2026 15:43:07 -0800 Subject: [PATCH 37/37] add single banana tasks --- .../fast_td3/configs/pick_place_banana4.yaml | 89 ++++ .../fast_td3/configs/pick_place_banana5.yaml | 89 ++++ .../fast_td3/configs/pick_place_banana6.yaml | 89 ++++ .../fast_td3/configs/pick_place_banana7.yaml | 89 ++++ .../pick_place/approach_grasp_banana4.py | 221 ++++++++++ .../pick_place/approach_grasp_banana5.py | 221 ++++++++++ .../pick_place/approach_grasp_banana6.py | 221 ++++++++++ .../pick_place/approach_grasp_banana7.py | 221 ++++++++++ .../tasks/pick_place/track_banana4.py | 405 ++++++++++++++++++ .../tasks/pick_place/track_banana5.py | 405 ++++++++++++++++++ .../tasks/pick_place/track_banana6.py | 405 ++++++++++++++++++ .../tasks/pick_place/track_banana7.py | 405 ++++++++++++++++++ 12 files changed, 2860 insertions(+) create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml create mode 100644 roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana4.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana5.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana6.py create mode 100644 roboverse_pack/tasks/pick_place/approach_grasp_banana7.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana4.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana5.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana6.py create mode 100644 roboverse_pack/tasks/pick_place/track_banana7.py diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml new file mode 100644 index 000000000..600aa560d --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana4" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana4" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana4" # Unique run name +model_dir: "models/banana4" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml new file mode 100644 index 000000000..79980bdb5 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana5" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana5" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana5" # Unique run name +model_dir: "models/banana5" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml new file mode 100644 index 000000000..4e1d10e27 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana6" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana6" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana6" # Unique run name +model_dir: "models/banana6" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml new file mode 100644 index 000000000..f9ea92a86 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana7" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana7" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana7" # Unique run name +model_dir: "models/banana7" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py new file mode 100644 index 000000000..19ec00d6a --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana4", + "pick_place.approach_grasp_simple_banana4", + "pick_place.approach_grasp_banana4", +) +class PickPlaceApproachGraspSimpleBanana4(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.580000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py new file mode 100644 index 000000000..277684e11 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana5", + "pick_place.approach_grasp_simple_banana5", + "pick_place.approach_grasp_banana5", +) +class PickPlaceApproachGraspSimpleBanana5(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.430000, 0.010000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py new file mode 100644 index 000000000..dd0fb8179 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana6", + "pick_place.approach_grasp_simple_banana6", + "pick_place.approach_grasp_banana6", +) +class PickPlaceApproachGraspSimpleBanana6(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.220000, 0.090000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py new file mode 100644 index 000000000..834f7b596 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana7", + "pick_place.approach_grasp_simple_banana7", + "pick_place.approach_grasp_banana7", +) +class PickPlaceApproachGraspSimpleBanana7(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.550000, -0.510000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/track_banana4.py b/roboverse_pack/tasks/pick_place/track_banana4.py new file mode 100644 index 000000000..6531b386e --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana4.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana4", "pick_place_track_banana4") +class PickPlaceTrackBanana4(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana5.py b/roboverse_pack/tasks/pick_place/track_banana5.py new file mode 100644 index 000000000..4127a2a80 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana5.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana5", "pick_place_track_banana5") +class PickPlaceTrackBanana5(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana6.py b/roboverse_pack/tasks/pick_place/track_banana6.py new file mode 100644 index 000000000..31586b126 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana6.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana6", "pick_place_track_banana6") +class PickPlaceTrackBanana6(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana7.py b/roboverse_pack/tasks/pick_place/track_banana7.py new file mode 100644 index 000000000..a74458808 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana7.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana7", "pick_place_track_banana7") +class PickPlaceTrackBanana7(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info