diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index cf0a38050..3f6252887 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -774,7 +774,11 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]: # Apply GS background rendering if enabled # TODO: Render with batch parallelization for efficiency - if self.scenario.gs_scene.with_gs_background and self.gs_background is not None: + if ( + self.scenario.gs_scene is not None + and self.scenario.gs_scene.with_gs_background + and self.gs_background is not None + ): assert ROBO_SPLATTER_AVAILABLE, "RoboSplatter is not available. GS background rendering will be disabled." camera_states = self._apply_gs_background_rendering(camera_states, env_ids) diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml new file mode 100644 index 000000000..a962f4aa4 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana2.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana2" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana2" # Unique run name +model_dir: "models/banana2" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml new file mode 100644 index 000000000..58ce46335 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana3.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana3" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana3" # Unique run name +model_dir: "models/banana3" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml new file mode 100644 index 000000000..600aa560d --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana4.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana4" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana4" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana4" # Unique run name +model_dir: "models/banana4" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml new file mode 100644 index 000000000..79980bdb5 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana5.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana5" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana5" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana5" # Unique run name +model_dir: "models/banana5" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml new file mode 100644 index 000000000..4e1d10e27 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana6.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana6" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana6" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana6" # Unique run name +model_dir: "models/banana6" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml new file mode 100644 index 000000000..f9ea92a86 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_banana7.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Banana +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_banana7" +decimation: 4 +train_or_eval: "train" +headless: True + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_banana7" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_banana7" # Unique run name +model_dir: "models/banana7" # Separate directory for banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml new file mode 100644 index 000000000..4b92f2fc8 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool2.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Cutting Tool +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_cuttingtool2" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_cuttingtool2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_cuttingtool2" # Unique run name +model_dir: "models/cuttingtool2" # Separate directory for cuttingtool2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml new file mode 100644 index 000000000..59597950b --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/pick_place_cuttingtool3.yaml @@ -0,0 +1,89 @@ +# Base Configuration for FastTD3 Training - Cutting Tool +# Default configuration with IsaacGym simulator and Franka robot + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.approach_grasp_simple_cuttingtool3" +decimation: 4 +train_or_eval: "train" +headless: False + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 600.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "get_started_fttd3" +exp_name: "get_started_fttd3_cuttingtool3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.approach_grasp_simple_cuttingtool3" # Unique run name +model_dir: "models/cuttingtool3" # Separate directory for cuttingtool3 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml index f5b6f4d5b..ae77d0b0a 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_banana.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_banana.yaml @@ -10,7 +10,7 @@ robots: ["franka"] task: "pick_place.track_banana" decimation: 4 train_or_eval: "train" -headless: False +headless: True # State file path for track task (pkl file path for grasp states) # If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE @@ -62,7 +62,7 @@ num_atoms: 101 # Value Distribution & Exploration # ------------------------------------------------------------------------------- v_min: 0 -v_max: 600.0 +v_max: 1500.0 policy_noise: 0.001 std_min: 0.001 std_max: 0.4 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml new file mode 100644 index 000000000..27f64e9c7 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_banana2.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Banana) +# Stage 3: Track task for banana object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_banana, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_banana2" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_simple_banana2_franka_lift_states_103states_20251217_213149.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 2000.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_banana2" +exp_name: "pick_place_track_banana2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_banana2" # Unique run name for banana track task +model_dir: "models/track_banana2" # Separate directory for track banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml b/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml new file mode 100644 index 000000000..a0925b38f --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_banana3.yaml @@ -0,0 +1,94 @@ +# Configuration for FastTD3 Training - Track Task (Banana) +# Stage 3: Track task for banana object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_banana, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_banana3" +decimation: 4 +train_or_eval: "train" +headless: False + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_simple_banana3_franka_lift_states_103states_20251217_213149.pkl" + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 2000.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_banana3" +exp_name: "pick_place_track_banana3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_banana3" # Unique run name for banana track task +model_dir: "models/track_banana3" # Separate directory for track banana checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml new file mode 100644 index 000000000..809538389 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool2.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Cutting Tool) +# Stage 3: Track task for cutting tool object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_cuttingtool2, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_cuttingtool2" +decimation: 4 +train_or_eval: "train" +headless: True + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_cuttingtool2_franka_lift_states_100states_20251210_153518.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 1500.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_cuttingtool2" +exp_name: "pick_place_track_cuttingtool2" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_cuttingtool2" # Unique run name for cuttingtool2 track task +model_dir: "models/track_cuttingtool2" # Separate directory for track cuttingtool2 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml new file mode 100644 index 000000000..e17e45a75 --- /dev/null +++ b/roboverse_learn/rl/fast_td3/configs/track_cuttingtool3.yaml @@ -0,0 +1,95 @@ +# Configuration for FastTD3 Training - Track Task (Cutting Tool) +# Stage 3: Track task for cutting tool object - for training trajectory tracking +# Starts from saved grasp states from approach_grasp_simple_cuttingtool2, only trains trajectory tracking + +# ------------------------------------------------------------------------------- +# Environment +# ------------------------------------------------------------------------------- +sim: "isaacgym" +robots: ["franka"] +task: "pick_place.track_cuttingtool3" +decimation: 4 +train_or_eval: "train" +headless: True + +# State file path for track task (pkl file path for grasp states) +# If null, uses default path or env var PICK_PLACE_TRACK_STATE_FILE +state_file_path: "eval_states/pick_place.approach_grasp_cuttingtool3_franka_lift_states_100states_20251210_153518.pkl" + + +# ------------------------------------------------------------------------------- +# Seeds & Device +# ------------------------------------------------------------------------------- +seed: 1 +cuda: true +torch_deterministic: true +device_rank: 0 + +# ------------------------------------------------------------------------------- +# Rollout & Timesteps +# ------------------------------------------------------------------------------- +num_envs: 400 +num_eval_envs: 400 +total_timesteps: 2000000 +learning_starts: 10 +num_steps: 1 + +# ------------------------------------------------------------------------------- +# Replay, Batching, Discounting +# ------------------------------------------------------------------------------- +buffer_size: 20480 +batch_size: 32768 +gamma: 0.99 +tau: 0.1 + +# ------------------------------------------------------------------------------- +# Update Schedule +# ------------------------------------------------------------------------------- +policy_frequency: 2 +num_updates: 5 +# ------------------------------------------------------------------------------- +# Optimizer & Network +# ------------------------------------------------------------------------------- +critic_learning_rate: 0.0003 +actor_learning_rate: 0.0003 +weight_decay: 0.1 +critic_hidden_dim: 512 +actor_hidden_dim: 256 +init_scale: 0.01 +num_atoms: 101 + +# ------------------------------------------------------------------------------- +# Value Distribution & Exploration +# ------------------------------------------------------------------------------- +v_min: 0 +v_max: 1500.0 +policy_noise: 0.001 +std_min: 0.001 +std_max: 0.4 +noise_clip: 0.5 + +# ------------------------------------------------------------------------------- +# Algorithm Flags +# ------------------------------------------------------------------------------- +use_cdq: true +compile: true +obs_normalization: true +max_grad_norm: 0.0 +amp: true +amp_dtype: "fp16" +disable_bootstrap: false +measure_burnin: 3 + +# ------------------------------------------------------------------------------- +# Logging & Checkpointing +# ------------------------------------------------------------------------------- +wandb_project: "pick_place_track_cuttingtool3" +exp_name: "pick_place_track_cuttingtool3" +use_wandb: false +checkpoint_path: null +run_name: "pick_place.track_cuttingtool3" # Unique run name for cuttingtool3 track task +model_dir: "models/track_cuttingtool3" # Separate directory for track cuttingtool3 checkpoints +eval_interval: 5000 +save_interval: 5000 +video_width: 1024 +video_height: 1024 diff --git a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml index 0c3efbde9..8893290af 100644 --- a/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml +++ b/roboverse_learn/rl/fast_td3/configs/track_screwdriver.yaml @@ -62,7 +62,7 @@ num_atoms: 101 # Value Distribution & Exploration # ------------------------------------------------------------------------------- v_min: 0 -v_max: 600.0 +v_max: 2000.0 policy_noise: 0.001 std_min: 0.001 std_max: 0.4 diff --git a/roboverse_learn/rl/fast_td3/evaluate.py b/roboverse_learn/rl/fast_td3/evaluate.py index 0a2d984a4..f379ee73a 100644 --- a/roboverse_learn/rl/fast_td3/evaluate.py +++ b/roboverse_learn/rl/fast_td3/evaluate.py @@ -496,7 +496,7 @@ def main(): parser.add_argument('--device_rank', type=int, default=0, help='GPU device rank') - parser.add_argument('--num_envs', type=int, default=None, + parser.add_argument('--num_envs', type=int, default=5, help='Number of parallel environments (default: from checkpoint config)') parser.add_argument('--headless', action='store_true', help='Run in headless mode') diff --git a/roboverse_learn/rl/fast_td3/evaluate_lift.py b/roboverse_learn/rl/fast_td3/evaluate_lift.py index ba613cb31..405d7ba35 100644 --- a/roboverse_learn/rl/fast_td3/evaluate_lift.py +++ b/roboverse_learn/rl/fast_td3/evaluate_lift.py @@ -165,11 +165,13 @@ def evaluate_lift_collection( lift_start_state = {} lift_frame_count = {} + lift_capture_recorded = {} in_lift_phase = {} recording_traj = {} for i in range(num_eval_envs): lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False @@ -245,8 +247,9 @@ def evaluate_lift_collection( if grasp_success and lift_active and not in_lift_phase[i]: in_lift_phase[i] = True - lift_start_state[i] = extract_state_dict(env, scenario, env_idx=i) - lift_frame_count[i] = 1 + lift_start_state[i] = None + lift_frame_count[i] = 0 + lift_capture_recorded[i] = False recording_traj[i] = True log.info(f"[Env {i}] Entered lift phase (grasp success and lift active)") @@ -255,7 +258,12 @@ def evaluate_lift_collection( if lift_active and grasp_success: lift_frame_count[i] += 1 - if lift_frame_count[i] >= lift_stable_frames: + if (not lift_capture_recorded[i]) and lift_frame_count[i] >= lift_stable_frames: + # Capture the state after holding grasp for lift_stable_frames frames + lift_start_state[i] = extract_state_dict(env, scenario, env_idx=i) + lift_capture_recorded[i] = True + + if lift_capture_recorded[i] and lift_frame_count[i] >= lift_stable_frames * 2: traj_data = { "init_state": current_episode_init_state[i], "actions": current_episode_actions[i], @@ -280,6 +288,7 @@ def evaluate_lift_collection( lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False @@ -287,6 +296,7 @@ def evaluate_lift_collection( done_masks[i] = True else: lift_frame_count[i] = 0 + lift_capture_recorded[i] = False if not grasp_success: in_lift_phase[i] = False recording_traj[i] = False @@ -303,6 +313,7 @@ def evaluate_lift_collection( lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False current_episode_actions[i] = [] @@ -322,6 +333,7 @@ def evaluate_lift_collection( for i in range(num_eval_envs): lift_start_state[i] = None lift_frame_count[i] = 0 + lift_capture_recorded[i] = False in_lift_phase[i] = False recording_traj[i] = False current_episode_actions[i] = [] diff --git a/roboverse_learn/rl/fast_td3/train.py b/roboverse_learn/rl/fast_td3/train.py index 5effe6e06..dfb03aaab 100644 --- a/roboverse_learn/rl/fast_td3/train.py +++ b/roboverse_learn/rl/fast_td3/train.py @@ -58,11 +58,11 @@ def get_config(): torch.set_float32_matmul_precision("high") -import inspect import numpy as np import torch import torch.nn.functional as F import tqdm +import wandb from loguru import logger as log from tensordict import TensorDict from torch import optim @@ -137,14 +137,11 @@ def main() -> None: scaler = GradScaler(enabled=amp_enabled and amp_dtype == torch.float16) - # Import wandb if enabled - if cfg("use_wandb"): - import wandb - if cfg("train_or_eval") == "train": - wandb.init( - project=cfg("wandb_project", "fttd3_training"), - save_code=True, - ) + if cfg("use_wandb") and cfg("train_or_eval") == "train": + wandb.init( + project=cfg("wandb_project", "fttd3_training"), + save_code=True, + ) random.seed(cfg("seed")) np.random.seed(cfg("seed")) @@ -169,23 +166,9 @@ def main() -> None: scenario = task_cls.scenario.update( robots=cfg("robots"), simulator=cfg("sim"), num_envs=cfg("num_envs"), headless=cfg("headless"), cameras=[] ) - # Check if task class accepts state_file_path parameter (only track tasks do) - init_signature = inspect.signature(task_cls.__init__) - accepts_state_file_path = "state_file_path" in init_signature.parameters - - # Pass state_file_path from config if task accepts it (for track tasks) - state_file_path = cfg("state_file_path", None) - if accepts_state_file_path and state_file_path is not None: - envs = task_cls(scenario, device=device, state_file_path=state_file_path) - else: - envs = task_cls(scenario, device=device) - # Only use viser wrapper if not headless and viser is available - if not cfg("headless"): - try: - from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper - envs = TaskViserWrapper(envs) - except ImportError: - log.warning("Viser not available, skipping visualization wrapper") + envs = task_cls(scenario, device=device) + from metasim.utils.viser.viser_env_wrapper import TaskViserWrapper + envs = TaskViserWrapper(envs) eval_envs = envs # ---------------- derive shapes ------------------------------------ @@ -316,11 +299,7 @@ def render_with_rollout() -> list: scenario_render = scenario.update( robots=robots, simulator=simulator, num_envs=num_envs, headless=headless, cameras=cameras ) - # Pass state_file_path from config if task accepts it (for track tasks) - if accepts_state_file_path and state_file_path is not None: - env = task_cls(scenario_render, device=device, state_file_path=state_file_path) - else: - env = task_cls(scenario_render, device=device) + env = task_cls(scenario_render, device=device) obs_normalizer.eval() obs, info = env.reset() diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py index b94c152a3..966d341ab 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_banana.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -19,7 +15,11 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_banana", "pick_place_approach_grasp_simple_banana") +@register_task( + "pick_place.approach_grasp_simple_banana", + "pick_place.approach_grasp_simple_banana", + "pick_place.approach_grasp_banana", +) class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for banana object. @@ -153,130 +153,98 @@ class PickPlaceApproachGraspSimpleBanana(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers - from saved_poses_20251206_banana_basket.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251206_banana_basket.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from object_layout.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Banana as the object to pick - "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), - "table": saved_poses["objects"]["table"], - "lamp": saved_poses["objects"].get("lamp"), - "basket": saved_poses["objects"].get("basket"), - "bowl": saved_poses["objects"].get("bowl"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "spoon": saved_poses["objects"].get("spoon"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.610000, 0.200000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.610000, -0.300000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.350000, 0.250000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.180000, -0.070000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.530000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.580000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.280000, -0.540000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.320000, -0.490000, 0.910000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.330000, -0.430000, 1.110000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.360000, -0.350000, 1.210000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.600000, -0.310000, 1.190000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813415]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from original approach_grasp_banana.py - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "lamp": { - "pos": torch.tensor([0.610000, 0.200000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.610000, -0.300000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.350000, 0.250000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.180000, -0.070000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "spoon": { - "pos": torch.tensor([0.530000, -0.690000, 0.850000]), - "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), - }, - "object": { - "pos": torch.tensor([0.280000, -0.580000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.280000, -0.540000, 0.850000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.320000, -0.490000, 0.910000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.330000, -0.430000, 1.110000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.360000, -0.350000, 1.210000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.600000, -0.310000, 1.190000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813415]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py new file mode 100644 index 000000000..586cb93fd --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana2.py @@ -0,0 +1,292 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana2", + "pick_place.approach_grasp_simple_banana2", + "pick_place.approach_grasp_banana2", +) +class PickPlaceApproachGraspSimpleBanana2(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="screw_driver", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.600000, 0.310000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.340000, 0.250000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "screw_driver": { + "pos": torch.tensor([0.190000, 0.100000, 0.811000]), + "rot": torch.tensor([-0.868588, -0.274057, -0.052298, 0.409518]), + }, + "spoon": { + "pos": torch.tensor([0.350000, 0.270000, 0.860000]), + "rot": torch.tensor([0.952675, -0.005654, 0.187418, 0.239269]), + }, + "object": { + "pos": torch.tensor([0.570000, -0.200000, 0.795000]), + "rot": torch.tensor([0.644470, -0.113948, 0.110352, 0.747993]), + }, + "mug": { + "pos": torch.tensor([0.310000, 0.070000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.210000, -0.480000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.580000, -0.220000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.540000, -0.230000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.440000, -0.310000, 0.970000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.310000, -0.380000, 0.950000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.240000, -0.480000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py new file mode 100644 index 000000000..68ddfa0c2 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana3.py @@ -0,0 +1,281 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana3", + "pick_place.approach_grasp_simple_banana3", + "pick_place.approach_grasp_banana3", +) +class PickPlaceApproachGraspSimpleBanana3(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.140000, -0.090000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.240000, -0.630000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "mug": { + "pos": torch.tensor([0.540000, -0.710000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.620000, -0.120000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.320000, -0.340000, 0.811000]), + "rot": torch.tensor([0.911038, 0.000000, -0.000000, 0.412321]), + }, + "vase": { + "pos": torch.tensor([0.300000, 0.050000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.220000, -0.630000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.270000, -0.560000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.360000, -0.470000, 0.920000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.390000, 0.900000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.450000, -0.310000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.519098, -0.000000, -0.000001, 0.854714]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py new file mode 100644 index 000000000..19ec00d6a --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana4.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana4", + "pick_place.approach_grasp_simple_banana4", + "pick_place.approach_grasp_banana4", +) +class PickPlaceApproachGraspSimpleBanana4(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.280000, -0.580000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py new file mode 100644 index 000000000..277684e11 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana5.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana5", + "pick_place.approach_grasp_simple_banana5", + "pick_place.approach_grasp_banana5", +) +class PickPlaceApproachGraspSimpleBanana5(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.430000, 0.010000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py new file mode 100644 index 000000000..dd0fb8179 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana6.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana6", + "pick_place.approach_grasp_simple_banana6", + "pick_place.approach_grasp_banana6", +) +class PickPlaceApproachGraspSimpleBanana6(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.220000, 0.090000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py b/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py new file mode 100644 index 000000000..834f7b596 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_banana7.py @@ -0,0 +1,221 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import importlib.util +import os + +import torch +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_banana7", + "pick_place.approach_grasp_simple_banana7", + "pick_place.approach_grasp_banana7", +) +class PickPlaceApproachGraspSimpleBanana7(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for banana object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses banana mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_banana_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Get initial states for all environments. + + Uses saved poses from object_layout.py. Loads banana, table, basket, and trajectory markers + from saved_poses_20251206_banana_basket.py. + """ + # Add path to saved poses + saved_poses_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "get_started", + "output", + "saved_poses_20251206_banana_basket.py", + ) + if os.path.exists(saved_poses_path): + # Load saved poses dynamically + spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) + saved_poses_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(saved_poses_module) + saved_poses = saved_poses_module.poses + else: + # Fallback to default poses if saved file not found + log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") + saved_poses = None + + if saved_poses is not None: + # Use saved poses from object_layout.py + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + # Banana as the object to pick + "object": saved_poses["objects"].get("banana", saved_poses["objects"].get("object")), + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"].get("lamp"), + "basket": saved_poses["objects"].get("basket"), + "bowl": saved_poses["objects"].get("bowl"), + "cutting_tools": saved_poses["objects"].get("cutting_tools"), + "spoon": saved_poses["objects"].get("spoon"), + # Include trajectory markers if present + "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), + "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), + "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), + "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), + "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + # Remove None values + env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} + init.append(env_state) + else: + # Default poses (fallback) - using poses from original approach_grasp_banana.py + init = [ + { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.550000, -0.510000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.250000, -0.570000, 0.850000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.310000, -0.460000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.340000, -0.330000, 0.960000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.420000, -0.150000, 0.920000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.420000, 0.020000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.810000, -0.330000, 0.820000]), + "rot": torch.tensor([0.215409, -0.102827, 0.002411, 0.971092]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + for _ in range(self.num_envs) + ] + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py new file mode 100644 index 000000000..13a9e9480 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool2.py @@ -0,0 +1,245 @@ +"""Stage 1: Simple Approach and Grasp task for banana object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the banana object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import torch + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_cuttingtool2", + "pick_place.approach_grasp_simple_cuttingtool2", + "pick_place.approach_grasp_cuttingtool2", +) +class PickPlaceApproachGraspSimpleCuttingTool2(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for cutting tool object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses cutting tool mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_cuttingtool2_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.530000, 0.180000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.570000, -0.330000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.220000, -0.020000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "mug": { + "pos": torch.tensor([0.500000, -0.710000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.220000, -0.520000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.260000, -0.270000, 0.811000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.510000, -0.290000, 0.830000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.480000, -0.340000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.360000, -0.410000, 0.930000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.250000, -0.470000, 0.930000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.230000, -0.520000, 0.890000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.640796, 0.019186, 0.016023, 0.767304]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py new file mode 100644 index 000000000..51e6149f9 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/approach_grasp_cuttingtool3.py @@ -0,0 +1,245 @@ +"""Stage 1: Simple Approach and Grasp task for cutting tool object. + +This task inherits from PickPlaceApproachGraspSimple and customizes it for the cutting tool object +with specific mesh configurations and saved poses from object_layout.py. +""" + +from __future__ import annotations + +import torch + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple + + +@register_task( + "pick_place.approach_grasp_simple_cuttingtool3", + "pick_place.approach_grasp_simple_cuttingtool3", + "pick_place.approach_grasp_cuttingtool3", +) +class PickPlaceApproachGraspSimpleCuttingTool3(PickPlaceApproachGraspSimple): + """Simple Approach and Grasp task for cutting tool object. + + This task inherits from PickPlaceApproachGraspSimple and customizes: + - Scenario: Uses cutting tool mesh, table mesh, and basket from EmbodiedGenData + - Initial states: Loads poses from saved_poses_20251206_cuttingtool3_basket.py + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def _get_initial_states(self) -> list[dict] | None: + """Hardcoded initial states from saved_poses_20251206_banana_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.330000, 0.160000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "object": { + "pos": torch.tensor([0.320000, -0.760000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.580000, -0.690000, 0.850000]), + "rot": torch.tensor([0.961352, -0.120799, 0.030845, 0.245473]), + }, + "mug": { + "pos": torch.tensor([0.570000, 0.040000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.550000, -0.280000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "remote_control": { + "pos": torch.tensor([0.170000, -0.190000, 0.811000]), + "rot": torch.tensor([0.921060, 0.000000, -0.000000, 0.389418]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.230000, -0.680000, 0.830000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.240000, -0.620000, 0.870000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.290000, -0.560000, 0.920000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.340000, -0.460000, 0.930000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.550000, -0.290000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.640796, 0.019186, 0.016023, 0.767304]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, + }, + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) + + return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py index a5b8def81..e79fbd05d 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_screwdriver.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -19,7 +15,11 @@ from roboverse_pack.tasks.pick_place.approach_grasp import PickPlaceApproachGraspSimple -@register_task("pick_place.approach_grasp_simple_screwdriver", "pick_place_approach_grasp_simple_screwdriver") +@register_task( + "pick_place.approach_grasp_simple_screwdriver", + "pick_place.approach_grasp_screwdriver", + "pick_place_approach_grasp_simple_screwdriver", +) class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): """Simple Approach and Grasp task for screwdriver object. @@ -153,133 +153,98 @@ class PickPlaceApproachGraspSimpleScrewDriver(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from object_layout.py. Loads screwdriver, table, basket, and trajectory markers - from saved_poses_20251206_screwdriver_basket.py. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251206_screwdriver_basket.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from object_layout.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Screwdriver as the object to pick - "object": saved_poses["objects"].get( - "screwdriver", - saved_poses["objects"].get("screw_driver", saved_poses["objects"].get("object")), - ), - "table": saved_poses["objects"]["table"], - "lamp": saved_poses["objects"].get("lamp"), - "basket": saved_poses["objects"].get("basket"), - "bowl": saved_poses["objects"].get("bowl"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "spoon": saved_poses["objects"].get("spoon"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251206_screwdriver_basket.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.400000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "lamp": { + "pos": torch.tensor([0.680000, 0.310000, 1.050000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.240000, -0.440000, 0.925000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "bowl": { + "pos": torch.tensor([0.620000, -0.080000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.230000, 0.090000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "object": { + "pos": torch.tensor([0.590000, -0.360000, 0.811000]), + "rot": torch.tensor([-0.824810, -0.390085, -0.023230, 0.408623]), + }, + "spoon": { + "pos": torch.tensor([0.470000, -0.710000, 0.830000]), + "rot": torch.tensor([0.928125, -0.061844, -0.058501, -0.362397]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.600000, -0.330000, 0.840000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.560000, -0.400000, 1.010000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.530000, -0.400000, 1.140000]), + "rot": torch.tensor([0.962425, -0.271547, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.430000, -0.340000, 1.160000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.260000, -0.400000, 1.170000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.800000, -0.800000, 0.780000]), + "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813414]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from original approach_grasp_screwdriver.py - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.400000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "lamp": { - "pos": torch.tensor([0.680000, 0.310000, 1.050000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.240000, -0.440000, 0.925000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "bowl": { - "pos": torch.tensor([0.620000, -0.080000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.230000, 0.090000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "object": { - "pos": torch.tensor([0.590000, -0.360000, 0.811000]), - "rot": torch.tensor([-0.824810, -0.390085, -0.023230, 0.408623]), - }, - "spoon": { - "pos": torch.tensor([0.470000, -0.710000, 0.830000]), - "rot": torch.tensor([0.928125, -0.061844, -0.058501, -0.362397]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.600000, -0.330000, 0.840000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.560000, -0.400000, 1.010000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.530000, -0.400000, 1.140000]), - "rot": torch.tensor([0.962425, -0.271547, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.430000, -0.340000, 1.160000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.260000, -0.400000, 1.170000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.800000, -0.800000, 0.780000]), - "rot": torch.tensor([0.581682, -0.000000, -0.000001, 0.813414]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["object"], + "table": saved_poses["objects"]["table"], + "lamp": saved_poses["objects"]["lamp"], + "basket": saved_poses["objects"]["basket"], + "bowl": saved_poses["objects"]["bowl"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "spoon": saved_poses["objects"]["spoon"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py index ee9f42ced..b83d5f63d 100644 --- a/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py +++ b/roboverse_pack/tasks/pick_place/approach_grasp_spoon.py @@ -6,11 +6,7 @@ from __future__ import annotations -import importlib.util -import os - import torch -from loguru import logger as log from metasim.constants import PhysicStateType from metasim.scenario.objects import RigidObjCfg @@ -153,130 +149,99 @@ class PickPlaceApproachGraspSimpleSpoon(PickPlaceApproachGraspSimple): max_episode_steps = 200 def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from saved_poses_20251212_spoon.py. Loads spoon, table, basket, and trajectory markers. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251212_spoon.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from saved_poses_20251212_spoon.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Spoon as the object to pick - "object": saved_poses["objects"].get("spoon"), - "table": saved_poses["objects"].get("table"), - "basket": saved_poses["objects"].get("basket"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "mug": saved_poses["objects"].get("mug"), - "book": saved_poses["objects"].get("book"), - "vase": saved_poses["objects"].get("vase"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251212_spoon.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.210000, 0.250000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.560000, -0.730000, 0.820000]), + "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), + }, + "spoon": { + "pos": torch.tensor([0.200000, -0.590000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), + }, + "mug": { + "pos": torch.tensor([0.690000, -0.550000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.700000, 0.280000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.050000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.220000, -0.550000, 0.880000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.220000, -0.310000, 0.900000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.210000, -0.250000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.210000, 0.040000, 1.250000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.190000, 0.250000, 1.010000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.560000, -0.230001, 0.800000]), + "rot": torch.tensor([0.120502, -0.000001, -0.000001, 0.992712]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from saved_poses_20251212_spoon.py as hardcoded values - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.440000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.210000, 0.250000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.560000, -0.730000, 0.820000]), - "rot": torch.tensor([0.930507, 0.000000, -0.000000, 0.366273]), - }, - "object": { - "pos": torch.tensor([0.200000, -0.590000, 0.820000]), - "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), - }, - "mug": { - "pos": torch.tensor([0.690000, -0.550000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "book": { - "pos": torch.tensor([0.700000, 0.280000, 0.820000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "vase": { - "pos": torch.tensor([0.680000, 0.050000, 0.950000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.220000, -0.550000, 0.880000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.220000, -0.310000, 0.900000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.210000, -0.250000, 1.080000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.210000, 0.040000, 1.250000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.190000, 0.250000, 1.010000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.560000, -0.230001, 0.800000]), - "rot": torch.tensor([0.120502, -0.000001, -0.000001, 0.992712]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["spoon"], + "table": saved_poses["objects"]["table"], + "basket": saved_poses["objects"]["basket"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "mug": saved_poses["objects"]["mug"], + "book": saved_poses["objects"]["book"], + "vase": saved_poses["objects"]["vase"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init @@ -290,129 +255,98 @@ class PickPlaceApproachGraspSimpleSpoon2(PickPlaceApproachGraspSimpleSpoon): """ def _get_initial_states(self) -> list[dict] | None: - """Get initial states for all environments. - - Uses saved poses from saved_poses_20251212_spoon2.py. Loads spoon, table, basket, and trajectory markers. - """ - # Add path to saved poses - saved_poses_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), - "get_started", - "output", - "saved_poses_20251212_spoon2.py", - ) - if os.path.exists(saved_poses_path): - # Load saved poses dynamically - spec = importlib.util.spec_from_file_location("saved_poses", saved_poses_path) - saved_poses_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(saved_poses_module) - saved_poses = saved_poses_module.poses - else: - # Fallback to default poses if saved file not found - log.warning(f"Saved poses file not found at {saved_poses_path}, using default poses") - saved_poses = None - - if saved_poses is not None: - # Use saved poses from saved_poses_20251212_spoon2.py - init = [] - for _ in range(self.num_envs): - env_state = { - "objects": { - # Spoon as the object to pick - "object": saved_poses["objects"].get("spoon"), - "table": saved_poses["objects"].get("table"), - "basket": saved_poses["objects"].get("basket"), - "cutting_tools": saved_poses["objects"].get("cutting_tools"), - "mug": saved_poses["objects"].get("mug"), - "book": saved_poses["objects"].get("book"), - "vase": saved_poses["objects"].get("vase"), - # Include trajectory markers if present - "traj_marker_0": saved_poses["objects"].get("traj_marker_0"), - "traj_marker_1": saved_poses["objects"].get("traj_marker_1"), - "traj_marker_2": saved_poses["objects"].get("traj_marker_2"), - "traj_marker_3": saved_poses["objects"].get("traj_marker_3"), - "traj_marker_4": saved_poses["objects"].get("traj_marker_4"), - }, - "robots": { - "franka": saved_poses["robots"]["franka"], + """Hardcoded initial states from saved_poses_20251212_spoon2.py.""" + saved_poses = { + "objects": { + "table": { + "pos": torch.tensor([0.440000, -0.200000, 0.400000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "basket": { + "pos": torch.tensor([0.640000, -0.620000, 0.825000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "cutting_tools": { + "pos": torch.tensor([0.290000, 0.370000, 0.820000]), + "rot": torch.tensor([0.640996, -0.000000, -0.000000, 0.767544]), + }, + "spoon": { + "pos": torch.tensor([0.140000, -0.370000, 0.820000]), + "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), + }, + "mug": { + "pos": torch.tensor([0.520000, 0.250000, 0.863000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "book": { + "pos": torch.tensor([0.240000, 0.160000, 0.820000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "vase": { + "pos": torch.tensor([0.680000, 0.270000, 0.950000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_0": { + "pos": torch.tensor([0.150000, -0.350000, 0.860000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_1": { + "pos": torch.tensor([0.240000, -0.460000, 0.940000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_2": { + "pos": torch.tensor([0.270000, -0.610000, 1.080000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + "traj_marker_3": { + "pos": torch.tensor([0.380000, -0.640000, 1.310000]), + "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), + }, + "traj_marker_4": { + "pos": torch.tensor([0.670000, -0.620000, 1.020000]), + "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), + }, + }, + "robots": { + "franka": { + "pos": torch.tensor([0.550000, -0.120000, 0.800000]), + "rot": torch.tensor([-0.393287, -0.000001, -0.000000, 0.919414]), + "dof_pos": { + "panda_finger_joint1": 0.040000, + "panda_finger_joint2": 0.040000, + "panda_joint1": 0.000000, + "panda_joint2": -0.785398, + "panda_joint3": 0.000000, + "panda_joint4": -2.356194, + "panda_joint5": 0.000000, + "panda_joint6": 1.570796, + "panda_joint7": 0.785398, }, - } - # Remove None values - env_state["objects"] = {k: v for k, v in env_state["objects"].items() if v is not None} - init.append(env_state) - else: - # Default poses (fallback) - using poses from saved_poses_20251212_spoon2.py as hardcoded values - init = [ - { - "objects": { - "table": { - "pos": torch.tensor([0.440000, -0.200000, 0.400000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "basket": { - "pos": torch.tensor([0.640000, -0.620000, 0.825000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "cutting_tools": { - "pos": torch.tensor([0.290000, 0.370000, 0.820000]), - "rot": torch.tensor([0.640996, -0.000000, -0.000000, 0.767544]), - }, - "object": { - "pos": torch.tensor([0.140000, -0.370000, 0.820000]), - "rot": torch.tensor([-0.702982, 0.088334, -0.087982, -0.700189]), - }, - "mug": { - "pos": torch.tensor([0.520000, 0.250000, 0.863000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "book": { - "pos": torch.tensor([0.240000, 0.160000, 0.820000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "vase": { - "pos": torch.tensor([0.680000, 0.270000, 0.950000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_0": { - "pos": torch.tensor([0.150000, -0.350000, 0.860000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_1": { - "pos": torch.tensor([0.240000, -0.460000, 0.940000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_2": { - "pos": torch.tensor([0.270000, -0.610000, 1.080000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - "traj_marker_3": { - "pos": torch.tensor([0.380000, -0.640000, 1.310000]), - "rot": torch.tensor([0.601833, 0.798621, 0.000000, -0.000000]), - }, - "traj_marker_4": { - "pos": torch.tensor([0.670000, -0.620000, 1.020000]), - "rot": torch.tensor([1.000000, 0.000000, 0.000000, 0.000000]), - }, - }, - "robots": { - "franka": { - "pos": torch.tensor([0.550000, -0.120000, 0.800000]), - "rot": torch.tensor([-0.393287, -0.000001, -0.000000, 0.919414]), - "dof_pos": { - "panda_finger_joint1": 0.040000, - "panda_finger_joint2": 0.040000, - "panda_joint1": 0.000000, - "panda_joint2": -0.785398, - "panda_joint3": 0.000000, - "panda_joint4": -2.356194, - "panda_joint5": 0.000000, - "panda_joint6": 1.570796, - "panda_joint7": 0.785398, - }, - }, - }, - } - for _ in range(self.num_envs) - ] + }, + }, + } + + init = [] + for _ in range(self.num_envs): + env_state = { + "objects": { + "object": saved_poses["objects"]["spoon"], + "table": saved_poses["objects"]["table"], + "basket": saved_poses["objects"]["basket"], + "cutting_tools": saved_poses["objects"]["cutting_tools"], + "mug": saved_poses["objects"]["mug"], + "book": saved_poses["objects"]["book"], + "vase": saved_poses["objects"]["vase"], + "traj_marker_0": saved_poses["objects"]["traj_marker_0"], + "traj_marker_1": saved_poses["objects"]["traj_marker_1"], + "traj_marker_2": saved_poses["objects"]["traj_marker_2"], + "traj_marker_3": saved_poses["objects"]["traj_marker_3"], + "traj_marker_4": saved_poses["objects"]["traj_marker_4"], + }, + "robots": { + "franka": saved_poses["robots"]["franka"], + }, + } + init.append(env_state) return init diff --git a/roboverse_pack/tasks/pick_place/base.py b/roboverse_pack/tasks/pick_place/base.py index ba472f47f..46e41be62 100644 --- a/roboverse_pack/tasks/pick_place/base.py +++ b/roboverse_pack/tasks/pick_place/base.py @@ -336,21 +336,6 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: target_pos = self.waypoint_positions[self.current_waypoint_idx] distance = torch.norm(ee_pos - target_pos, dim=-1) - # Progress reward: reward for reducing distance to waypoint - # This encourages the agent to make progress toward the current waypoint - # Only reward progress if we haven't reached this waypoint yet - not_already_reached = ~self.waypoints_reached[ - torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx - ] - distance_reduction = self.prev_distance_to_waypoint - distance - # Scale progress reward: 0.1 means progress reward is 10% of waypoint reached bonus - # This provides continuous guidance without overwhelming the sparse reward - progress_reward_component = ( - torch.clamp(distance_reduction * self.w_tracking_progress * 0.1, min=0.0) - * not_already_reached.float() - * grasped_mask.float() - ) - # Distance-based reward (far + near) / 2 distance_reward_far = 1 - torch.tanh(1.0 * distance) distance_reward_near = 1 - torch.tanh(10.0 * distance) @@ -373,13 +358,10 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: # Both distance and rotation must be satisfied to consider as reached reached = distance_reached & rotation_reached - newly_reached = reached & not_already_reached - # Waypoint reached bonus (one-time large reward when reaching a waypoint) - waypoint_reached_bonus = newly_reached.float() * self.w_tracking_progress - - # Update prev_distance for next step (only if not newly reached, - # because if newly reached, we'll update it when advancing to next waypoint) - self.prev_distance_to_waypoint[~newly_reached] = distance[~newly_reached] + newly_reached = reached & ( + ~self.waypoints_reached[torch.arange(self.num_envs, device=self.device), self.current_waypoint_idx] + ) + progress_reward = newly_reached.float() * self.w_tracking_progress if newly_reached.any(): if newly_reached[0]: @@ -423,10 +405,7 @@ def _reward_trajectory_tracking(self, env_states) -> torch.Tensor: (1 - torch.tanh(1.0 * distance_to_last[completed_mask])) * self.w_tracking_approach, ) - # Combine all reward components: approach reward + progress reward + waypoint reached bonus - tracking_reward = torch.where( - all_reached, maintain_reward, approach_reward + progress_reward_component + waypoint_reached_bonus - ) + tracking_reward = torch.where(all_reached, maintain_reward, approach_reward + progress_reward) return tracking_reward diff --git a/roboverse_pack/tasks/pick_place/track_banana.py b/roboverse_pack/tasks/pick_place/track_banana.py index ea36a8e83..feaf3331c 100644 --- a/roboverse_pack/tasks/pick_place/track_banana.py +++ b/roboverse_pack/tasks/pick_place/track_banana.py @@ -129,7 +129,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ "tracking_approach": 4.0, "tracking_progress": 150.0, - "rotation_tracking": 2.0, + "rotation_tracking": 0.0, }) # Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) @@ -182,6 +182,15 @@ class PickPlaceTrackBanana(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual banana mesh from EmbodiedGenData (matches object_layout.py) RigidObjCfg( name="object", scale=(1, 1, 1), @@ -190,14 +199,6 @@ class PickPlaceTrackBanana(PickPlaceBase): urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", ), - RigidObjCfg( - name="screw_driver", - scale=(1.5, 1.5, 1.5), - physics=PhysicStateType.RIGIDBODY, - usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/usd/ae51f060e3455e9f84a4fec81cc9284b.usd", - urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/ae51f060e3455e9f84a4fec81cc9284b.urdf", - mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/screwdriver/1/mjcf/ae51f060e3455e9f84a4fec81cc9284b.xml", - ), RigidObjCfg( name="spoon", scale=(1, 1, 1), @@ -306,7 +307,7 @@ def __init__(self, scenario, device=None): ] self.reward_weights = [ 1.0, - 1.0, # rotation_tracking weight is already applied inside the function + 0.0, # rotation_tracking weight is already applied inside the function ] def _prepare_states(self, states, env_ids): diff --git a/roboverse_pack/tasks/pick_place/track_banana2.py b/roboverse_pack/tasks/pick_place/track_banana2.py new file mode 100644 index 000000000..de3fa5da4 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana2.py @@ -0,0 +1,450 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana2", "pick_place_track_banana2") +class PickPlaceTrackBanana2(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="lamp", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/usd/0a4489b1a2875c82a580f8b62d346e08.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/0a4489b1a2875c82a580f8b62d346e08.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/lighting_fixtures/1/mjcf/0a4489b1a2875c82a580f8b62d346e08.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="cutting_tools", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + # Use actual banana mesh from EmbodiedGenData (matches object_layout.py) + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + # Visualization: Trajectory waypoints (5 spheres showing trajectory path) + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + fix_base_link=True, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana3.py b/roboverse_pack/tasks/pick_place/track_banana3.py new file mode 100644 index 000000000..bece9763c --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana3.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana3", "pick_place_track_banana3") +class PickPlaceTrackBanana3(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="bowl", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/usd/0f296af3df66565c9e1a7c2bc7b35d72.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/0f296af3df66565c9e1a7c2bc7b35d72.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/bowl/1/mjcf/0f296af3df66565c9e1a7c2bc7b35d72.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + RigidObjCfg( + name="vase", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/usd/vase.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/result/vase.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/vase/mjcf/vase.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana4.py b/roboverse_pack/tasks/pick_place/track_banana4.py new file mode 100644 index 000000000..6531b386e --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana4.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana4", "pick_place_track_banana4") +class PickPlaceTrackBanana4(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana5.py b/roboverse_pack/tasks/pick_place/track_banana5.py new file mode 100644 index 000000000..4127a2a80 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana5.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana5", "pick_place_track_banana5") +class PickPlaceTrackBanana5(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana6.py b/roboverse_pack/tasks/pick_place/track_banana6.py new file mode 100644 index 000000000..31586b126 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana6.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana6", "pick_place_track_banana6") +class PickPlaceTrackBanana6(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_banana7.py b/roboverse_pack/tasks/pick_place/track_banana7.py new file mode 100644 index 000000000..a74458808 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_banana7.py @@ -0,0 +1,405 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_banana7", "pick_place_track_banana7") +class PickPlaceTrackBanana7(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/usd/banana.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/result/banana.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/banana/mjcf/banana.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_cuttingtool2.py b/roboverse_pack/tasks/pick_place/track_cuttingtool2.py new file mode 100644 index 000000000..b74f94121 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_cuttingtool2.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_cuttingtool2", "pick_place_track_cuttingtool2") +class PickPlaceTrackCuttingTool2(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_cuttingtool3.py b/roboverse_pack/tasks/pick_place/track_cuttingtool3.py new file mode 100644 index 000000000..2c2278579 --- /dev/null +++ b/roboverse_pack/tasks/pick_place/track_cuttingtool3.py @@ -0,0 +1,445 @@ +"""Stage 3: Track task for trajectory tracking. + +Trains trajectory tracking from saved grasp states. +Object is already grasped, only needs to learn trajectory following. +""" + +from __future__ import annotations + +import os +import pickle +from copy import deepcopy +from pathlib import Path + +import numpy as np +import torch +import yaml +from loguru import logger as log + +from metasim.constants import PhysicStateType +from metasim.scenario.objects import RigidObjCfg +from metasim.scenario.scenario import ScenarioCfg, SimParamCfg +from metasim.task.registry import register_task +from roboverse_pack.tasks.pick_place.base import DEFAULT_CONFIG, PickPlaceBase + + +def load_states_from_pkl(pkl_path: str): + """Load state list from pkl file.""" + if not os.path.exists(pkl_path): + raise FileNotFoundError(f"State file not found: {pkl_path}") + + with open(pkl_path, "rb") as f: + states_list = pickle.load(f) + + log.info(f"Loaded {len(states_list)} states from {pkl_path}") + return states_list + + +def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, robot_name: str = "franka") -> dict: + """Convert state dict to initial state format.""" + initial_state = { + "objects": {}, + "robots": {}, + } + + if "objects" in state_dict and "robots" in state_dict: + for obj_name, obj_state in state_dict["objects"].items(): + pos = obj_state.get("pos") + rot = obj_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["objects"][obj_name] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in obj_state: + initial_state["objects"][obj_name]["dof_pos"] = obj_state["dof_pos"] + + for robot_name_key, robot_state in state_dict["robots"].items(): + pos = robot_state.get("pos") + rot = robot_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + + initial_state["robots"][robot_name_key] = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in robot_state: + initial_state["robots"][robot_name_key]["dof_pos"] = robot_state["dof_pos"] + else: + # Flat format: convert to nested + for name, entity_state in state_dict.items(): + if name in ["objects", "robots"]: + continue + + pos = entity_state.get("pos") + rot = entity_state.get("rot") + + if isinstance(pos, (list, tuple, np.ndarray)): + pos = torch.tensor(pos, device=device, dtype=torch.float32) + elif isinstance(pos, torch.Tensor): + pos = pos.to(device).float() + elif isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos).to(device).float() + + if isinstance(rot, (list, tuple, np.ndarray)): + rot = torch.tensor(rot, device=device, dtype=torch.float32) + elif isinstance(rot, torch.Tensor): + rot = rot.to(device).float() + elif isinstance(rot, np.ndarray): + rot = torch.from_numpy(rot).to(device).float() + + entity_entry = { + "pos": pos, + "rot": rot, + } + + if "dof_pos" in entity_state: + entity_entry["dof_pos"] = entity_state["dof_pos"] + + if name == robot_name: + initial_state["robots"][name] = entity_entry + else: + initial_state["objects"][name] = entity_entry + + return initial_state + + +DEFAULT_CONFIG_TRACK = deepcopy(DEFAULT_CONFIG) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ + "tracking_approach": 4.0, + "tracking_progress": 150.0, + "rotation_tracking": 0.0, +}) +# Remove unused rewards +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) +DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_close", None) +# Disable randomization for exact state reproduction +DEFAULT_CONFIG_TRACK["randomization"]["box_pos_range"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["robot_pos_noise"] = 0.0 +DEFAULT_CONFIG_TRACK["randomization"]["joint_noise_range"] = 0.0 + + +@register_task("pick_place.track_cuttingtool3", "pick_place_track_cuttingtool3") +class PickPlaceTrackCuttingTool3(PickPlaceBase): + """Trajectory tracking task from grasp states. + + Assumes object is already grasped, only learns trajectory following. + Initial states loaded from pkl file. + """ + + scenario = ScenarioCfg( + objects=[ + # EmbodiedGen Assets - Put Banana in Mug Scene + RigidObjCfg( + name="table", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/usd/table.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/result/table.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/table/mjcf/table.xml", + ), + RigidObjCfg( + name="basket", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/usd/663158968e3f5900af1f6e7cecef24c7.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/663158968e3f5900af1f6e7cecef24c7.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/basket/1/mjcf/663158968e3f5900af1f6e7cecef24c7.xml", + ), + RigidObjCfg( + name="object", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/usd/c5810e7c2c785fe3940372b205090bad.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/c5810e7c2c785fe3940372b205090bad.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/cutting_tools/1/mjcf/c5810e7c2c785fe3940372b205090bad.xml", + ), + RigidObjCfg( + name="spoon", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/usd/2f1c3077a8d954e58fc0bf75cf35e849.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/2f1c3077a8d954e58fc0bf75cf35e849.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/new_assets/spoon/1/mjcf/2f1c3077a8d954e58fc0bf75cf35e849.xml", + ), + RigidObjCfg( + name="mug", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/usd/mug.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/result/mug.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/mug/mjcf/mug.xml", + ), + RigidObjCfg( + name="book", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/usd/book.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/result/book.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/book/mjcf/book.xml", + ), + RigidObjCfg( + name="remote_control", + scale=(1, 1, 1), + physics=PhysicStateType.RIGIDBODY, + usd_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/usd/remote_control.usd", + urdf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/result/remote_control.urdf", + mjcf_path="roboverse_data/assets/EmbodiedGenData/demo_assets/remote_control/mjcf/remote_control.xml", + ), + # Trajectory markers + RigidObjCfg( + name="traj_marker_0", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_1", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_2", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_3", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + RigidObjCfg( + name="traj_marker_4", + urdf_path="roboverse_pack/tasks/pick_place/marker/marker.urdf", + mjcf_path="roboverse_pack/tasks/pick_place/marker/marker.xml", + usd_path="roboverse_pack/tasks/pick_place/marker/marker.usd", + scale=0.2, + physics=PhysicStateType.XFORM, + enabled_gravity=False, + collision_enabled=False, + ), + ], + robots=["franka"], + sim_params=SimParamCfg( + dt=0.005, + ), + decimation=4, + ) + max_episode_steps = 200 + + def __init__(self, scenario, device=None): + # Start from current file and walk upward to find RoboVerse root + root = Path(__file__).resolve() + while root != root.parent: + if (root / "roboverse_learn").exists(): + roboverseroot = root + break + root = root.parent + else: + raise RuntimeError("Could not locate RoboVerse root directory") + + # Now construct full path to the YAML + config_path = roboverseroot / "roboverse_learn" / "rl" / "fast_td3" / "configs" / "track_banana.yaml" + + with open(config_path) as f: + cfg = yaml.safe_load(f) + + self.state_file_path = cfg["state_file_path"] + self._loaded_states = None + + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._device = device + + self.object_grasped = None + + super().__init__(scenario, device) + + self.object_grasped = torch.ones(self.num_envs, dtype=torch.bool, device=self.device) + self.reward_functions = [ + self._reward_trajectory_tracking, + self._reward_rotation_tracking, + ] + self.reward_weights = [ + 1.0, + 0.0, # rotation_tracking weight is already applied inside the function + ] + + def _prepare_states(self, states, env_ids): + """Override to disable randomization for track task.""" + return states + + def _get_initial_states(self) -> list[dict] | None: + """Load initial states from pkl file.""" + if self._loaded_states is not None: + return self._loaded_states + + states_list = load_states_from_pkl(self.state_file_path) + + device = getattr(self, "_device", None) or getattr(self, "device", None) + if device is None: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + initial_states = [] + robot_name = "franka" + for state_dict in states_list: + initial_state = convert_state_dict_to_initial_state(state_dict, device, robot_name=robot_name) + initial_states.append(initial_state) + + if len(initial_states) < self.num_envs: + k = self.num_envs // len(initial_states) + remainder = self.num_envs % len(initial_states) + initial_states = initial_states * k + initial_states[:remainder] + + initial_states = initial_states[: self.num_envs] + + # Default waypoint positions + default_positions = [ + torch.tensor([0.610000, -0.280000, 0.150000], device=device), + torch.tensor([0.600000, -0.190000, 0.220000], device=device), + torch.tensor([0.560000, -0.110000, 0.360000], device=device), + torch.tensor([0.530000, 0.010000, 0.470000], device=device), + torch.tensor([0.510000, 0.130000, 0.460000], device=device), + ] + default_rotations = [ + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.998750, 0.000000, 0.049979, -0.000000], device=device), + torch.tensor([1.000000, 0.000000, 0.000000, 0.000000], device=device), + torch.tensor([0.984726, 0.000000, 0.174108, -0.000000], device=device), + ] + + for env_idx, initial_state in enumerate(initial_states): + if "objects" not in initial_state: + initial_state["objects"] = {} + + for i in range(self.num_waypoints): + marker_name = f"traj_marker_{i}" + if marker_name not in initial_state["objects"]: + if i < len(default_positions): + initial_state["objects"][marker_name] = { + "pos": default_positions[i].clone(), + "rot": default_rotations[i].clone(), + } + + self._loaded_states = initial_states + log.info(f"Loaded {len(initial_states)} initial states from {self.state_file_path}") + return initial_states + + def reset(self, env_ids=None): + """Reset environment, keeping object grasped.""" + if env_ids is None or self._last_action is None: + self._last_action = self._initial_states.robots[self.robot_name].joint_pos[:, :] + else: + self._last_action[env_ids] = self._initial_states.robots[self.robot_name].joint_pos[env_ids, :] + + if env_ids is None: + env_ids_tensor = torch.arange(self.num_envs, device=self.device) + env_ids_list = list(range(self.num_envs)) + else: + env_ids_tensor = ( + torch.tensor(env_ids, device=self.device) if not isinstance(env_ids, torch.Tensor) else env_ids + ) + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + self.current_waypoint_idx[env_ids_tensor] = 0 + self.waypoints_reached[env_ids_tensor] = False + self.prev_distance_to_waypoint[env_ids_tensor] = 0.0 + + self.object_grasped[env_ids_tensor] = True + + obs, info = super(PickPlaceBase, self).reset(env_ids=env_ids) + + states = self.handler.get_states() + if env_ids is None: + env_ids_list = list(range(self.num_envs)) + else: + env_ids_list = env_ids if isinstance(env_ids, list) else list(env_ids) + + ee_pos, _ = self._get_ee_state(states) + target_pos = self.waypoint_positions[0].unsqueeze(0).expand(len(env_ids_list), -1) + self.prev_distance_to_waypoint[env_ids_list] = torch.norm(ee_pos[env_ids_list] - target_pos, dim=-1) + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, info + + def step(self, actions): + """Step with delta control, keeping gripper closed.""" + current_joint_pos = self.handler.get_states(mode="tensor").robots[self.robot_name].joint_pos + delta_actions = actions * self._action_scale + new_actions = current_joint_pos + delta_actions + real_actions = torch.clamp(new_actions, self._action_low, self._action_high) + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + # delta_actions = actions * self._action_scale + # new_actions = self._last_action + delta_actions + + gripper_value_closed = torch.tensor(0.0, device=self.device, dtype=real_actions.dtype) + real_actions[:, 0] = gripper_value_closed + real_actions[:, 1] = gripper_value_closed + + obs, reward, terminated, time_out, info = super(PickPlaceBase, self).step(real_actions) + self._last_action = real_actions + + updated_states = self.handler.get_states(mode="tensor") + box_pos = updated_states.objects["object"].root_state[:, 0:3] + gripper_pos, _ = self._get_ee_state(updated_states) + gripper_box_dist = torch.norm(gripper_pos - box_pos, dim=-1) + is_grasping = gripper_box_dist < self.grasp_check_distance + + self.object_grasped = is_grasping + newly_released = ~is_grasping + + if newly_released.any() and newly_released[0]: + log.warning(f"[Env 0] Object released during tracking! Distance: {gripper_box_dist[0].item():.4f}m") + + terminated = terminated | newly_released + + info["grasp_success"] = self.object_grasped + info["stage"] = torch.full((self.num_envs,), 3, dtype=torch.long, device=self.device) + + return obs, reward, terminated, time_out, info diff --git a/roboverse_pack/tasks/pick_place/track_screwdriver.py b/roboverse_pack/tasks/pick_place/track_screwdriver.py index 543aaafb0..3b49b7761 100644 --- a/roboverse_pack/tasks/pick_place/track_screwdriver.py +++ b/roboverse_pack/tasks/pick_place/track_screwdriver.py @@ -129,7 +129,7 @@ def convert_state_dict_to_initial_state(state_dict: dict, device: torch.device, DEFAULT_CONFIG_TRACK["reward_config"]["scales"].update({ "tracking_approach": 4.0, "tracking_progress": 150.0, - "rotation_tracking": 2.0, + "rotation_tracking": 0.0, }) # Remove unused rewards DEFAULT_CONFIG_TRACK["reward_config"]["scales"].pop("gripper_approach", None) @@ -306,7 +306,7 @@ def __init__(self, scenario, device=None): ] self.reward_weights = [ 1.0, - 1.0, # rotation_tracking weight is already applied inside the function + 0.0, # rotation_tracking weight is already applied inside the function ] self.grasp_check_distance = 0.25