From 1b94356966c996a444fccb732c4ce48fcbce8824 Mon Sep 17 00:00:00 2001 From: Octi Date: Mon, 24 Mar 2025 00:45:11 -0700 Subject: [PATCH] introducing manipulation modules: factory extensions and track goals --- .../manager_based/manipulation/__init__.py | 6 + .../factory_extension/__init__.py | 6 + .../factory_extension/assembly_keypoints.py | 137 ++++++ .../factory_extension/config/__init__.py | 9 + .../config/franka/__init__.py | 39 ++ .../config/franka/agents/__init__.py | 4 + .../config/franka/agents/rsl_rl_ppo_cfg.py | 37 ++ .../config/franka/ik_del_env_cfg.py | 54 +++ .../config/franka/joint_pos_env_cfg.py | 59 +++ .../factory_extension/factory_assets_cfg.py | 338 +++++++++++++ .../factory_extension/factory_env_base.py | 322 +++++++++++++ .../factory_extension/gearmesh_env_cfg.py | 94 ++++ .../factory_extension/mdp/__init__.py | 12 + .../mdp/actions/actions_cfg_nist.py | 58 +++ .../mdp/actions/task_space_actions_nist.py | 448 ++++++++++++++++++ .../factory_extension/mdp/events.py | 153 ++++++ .../factory_extension/mdp/observations.py | 69 +++ .../factory_extension/mdp/rewards.py | 94 ++++ .../factory_extension/nutthread_env_cfg.py | 88 ++++ .../factory_extension/peginsert_env_cfg.py | 88 ++++ .../manipulation/track_goal/__init__.py | 6 + .../track_goal/config/__init__.py | 9 + .../track_goal/config/leap/__init__.py | 44 ++ .../track_goal/config/leap/agents/__init__.py | 4 + .../config/leap/agents/rsl_rl_cfg.py | 38 ++ .../track_goal/config/leap/track_goal_leap.py | 39 ++ .../track_goal/config/tycho/__init__.py | 39 ++ .../config/tycho/agents/__init__.py | 4 + .../config/tycho/agents/rsl_rl_cfg.py | 78 +++ .../config/tycho/tycho_track_goal.py | 71 +++ .../track_goal/config/ur5/__init__.py | 29 ++ .../track_goal/config/ur5/agents/__init__.py | 4 + .../config/ur5/agents/rsl_rl_cfg.py | 38 ++ .../config/ur5/track_goal_ur5_env_cfg.py | 64 +++ .../track_goal/config/xarm_leap/__init__.py | 63 +++ .../config/xarm_leap/agents/__init__.py | 4 + .../config/xarm_leap/agents/rsl_rl_cfg.py | 38 ++ .../config/xarm_leap/track_goal_xarm_leap.py | 215 +++++++++ .../track_goal_xarm_leap_deployment.py | 203 ++++++++ .../manipulation/track_goal/mdp/__init__.py | 10 + .../manipulation/track_goal/mdp/command.py | 118 +++++ .../track_goal/mdp/command_cfg.py | 28 ++ .../manipulation/track_goal/mdp/rewards.py | 47 ++ .../manipulation/track_goal/track_goal_env.py | 202 ++++++++ 44 files changed, 3510 insertions(+) create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/assembly_keypoints.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/rsl_rl_ppo_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/ik_del_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/joint_pos_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_assets_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_env_base.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/gearmesh_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/actions_cfg_nist.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/task_space_actions_nist.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/events.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/observations.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/rewards.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/nutthread_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/peginsert_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/rsl_rl_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/track_goal_leap.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/rsl_rl_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/tycho_track_goal.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/rsl_rl_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/track_goal_ur5_env_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/rsl_rl_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap_deployment.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/__init__.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command_cfg.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/rewards.py create mode 100644 source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/track_goal_env.py diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/__init__.py new file mode 100644 index 0000000..1e55ad1 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Manipulation environments for fixed-arm robots.""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/__init__.py new file mode 100644 index 0000000..2863ad6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Factory Task Environments""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/assembly_keypoints.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/assembly_keypoints.py new file mode 100644 index 0000000..dc959f3 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/assembly_keypoints.py @@ -0,0 +1,137 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.utils import configclass + +if TYPE_CHECKING: + from isaaclab.assets import Articulation, RigidObject + + +@configclass +class Offset: + pos: tuple[float, float, float] = (0.0, 0.0, 0.0) + quat: tuple[float, float, float, float] = (1.0, 0.0, 0.0, 0.0) + + @property + def pose(self) -> tuple[float, float, float, float, float, float, float]: + return self.pos + self.quat + + def apply(self, root: RigidObject | Articulation) -> tuple[torch.Tensor, torch.Tensor]: + data = root.data.root_pos_w + pos_w, quat_w = math_utils.combine_frame_transforms( + root.data.root_pos_w, + root.data.root_quat_w, + torch.tensor(self.pos).to(data.device).repeat(data.shape[0], 1), + torch.tensor(self.quat).to(data.device).repeat(data.shape[0], 1), + ) + return pos_w, quat_w + + def combine(self, pos: torch.Tensor, quat: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + num_data = pos.shape[0] + device = pos.device + return math_utils.combine_frame_transforms( + pos, + quat, + torch.tensor(self.pos).to(device).repeat(num_data, 1), + torch.tensor(self.quat).to(device).repeat(num_data, 1), + ) + + +@configclass +class KeyPointsNistBoard: + bolt_m16: Offset = Offset(pos=(0.145, -0.1495, -0.01)) + hole_8mm: Offset = Offset(pos=(-0.0895, 0.0, 0.01)) + gear_base: Offset = Offset(pos=(0.145, 0.021, 0.0092), quat=(0.70711, 0.0, 0.0, -0.70711)) + small_gear: Offset = Offset(pos=(0.145, 0.021, 0.0092), quat=(0.70711, 0.0, 0.0, -0.70711)) + medium_gear: Offset = Offset(pos=(0.145, 0.021, 0.0092), quat=(0.70711, 0.0, 0.0, -0.70711)) + large_gear: Offset = Offset(pos=(0.145, 0.021, 0.0092), quat=(0.70711, 0.0, 0.0, -0.70711)) + + +@configclass +class KeyPointsBoltM16: + bolt_tip_offset: Offset = Offset(pos=(0, 0, 0.035)) + bolt_base_offset: Offset = Offset(pos=(0, 0, 0.01)) + screwed_nut_offset: Offset = Offset(pos=(0, 0, 0.032)) + + +@configclass +class KeyPointsNutM16: + center_axis_bottom: Offset = Offset(pos=(0.0, 0.0, 0.01)) + center_axis_middle: Offset = Offset(pos=(0.0, 0.0, 0.0165)) + center_axis_top: Offset = Offset(pos=(0.0, 0.0, 0.023)) + grasp_point: Offset = Offset(pos=(0.0, 0.0, 0.01), quat=(0.70711, 0.0, 0.0, -0.70711)) + grasp_diameter: float = 0.024 + + +@configclass +class KeyPointsGearBase: + small_gear_tip_offset: Offset = Offset(pos=(0.05075, 0.0, 0.025)) + small_gear_assembled_bottom_offset = Offset(pos=(0.05075, 0.0, 0.005)) + medium_gear_tip_offset: Offset = Offset(pos=(0.02025, 0.0, 0.025)) + medium_gear_assembled_bottom_offset = Offset(pos=(0.02025, 0.0, 0.005)) + large_gear_tip_offset: Offset = Offset(pos=(-0.03025, 0.0, 0.025)) + large_gear_assembled_bottom_offset = Offset(pos=(-0.03025, 0.0, 0.005)) + + +@configclass +class KeyPointsSmallGear: + center_axis_bottom: Offset = Offset(pos=(0.05075, 0.0, 0.005)) + center_axis_top: Offset = Offset(pos=(0.05075, 0.0, 0.03)) + grasp_point: Offset = Offset(pos=(0.05075, 0.0, 0.022)) + grasp_diameter: float = 0.03 + + +@configclass +class KeyPointsMediumGear: + center_axis_bottom: Offset = Offset(pos=(0.02025, 0.0, 0.005)) + center_axis_top: Offset = Offset(pos=(0.02025, 0.0, 0.03)) + grasp_point: Offset = Offset(pos=(0.02025, 0.0, 0.022), quat=(0.70711, 0.0, 0.0, -0.70711)) + grasp_diameter: float = 0.03 + + +@configclass +class KeyPointsLargeGear: + center_axis_bottom: Offset = Offset(pos=(-3.025e-2, 0.0, 0.005)) + center_axis_top: Offset = Offset(pos=(-3.025e-2, 0.0, 0.03)) + grasp_point: Offset = Offset(pos=(-3.025e-2, 0.0, 0.022)) + grasp_diameter: float = 0.03 + + +@configclass +class KeyPointsHole8MM: + hole_tip_offset: Offset = Offset(pos=(0, 0, 0.025)) + inserted_peg_base_offset: Offset = Offset(pos=(0, 0, 0.0)) + + +@configclass +class KeyPointsPeg8MM: + center_axis_bottom: Offset = Offset(pos=(0.0, 0.0, 0.0)) + center_axis_middle: Offset = Offset(pos=(0.0, 0.0, 0.025)) + center_axis_top: Offset = Offset(pos=(0.0, 0.0, 0.05)) + grasp_point: Offset = Offset(pos=(0.0, 0.0, 0.035)) + grasp_diameter: float = 0.007986 + + +@configclass +class KeyPointPandaHand: + object_grasped_point: Offset = Offset(pos=(0.0, 0.0, 0.107), quat=(0.0, 0.0, 1.0, 0.0)) + + +KEYPOINTS_NISTBOARD = KeyPointsNistBoard() +KEYPOINTS_BOLTM16 = KeyPointsBoltM16() +KEYPOINTS_NUTM16 = KeyPointsNutM16() +KEYPOINTS_GEARBASE = KeyPointsGearBase() +KEYPOINTS_SMALLGEAR = KeyPointsSmallGear() +KEYPOINTS_MEDIUMGEAR = KeyPointsMediumGear() +KEYPOINTS_LARGEGEAR = KeyPointsLargeGear() +KEYPOINTS_HOLE8MM = KeyPointsHole8MM() +KEYPOINTS_PEG8MM = KeyPointsPeg8MM() +KEYPOINTS_PANDAHAND = KeyPointPandaHand() diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/__init__.py new file mode 100644 index 0000000..cd7e706 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for the factory environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/__init__.py new file mode 100644 index 0000000..095f51c --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +gym.register( + id="UW-NutThread-Franka-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:FrankaNutThreadEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FactoryPPORunnerCfg", + }, +) + + +gym.register( + id="UW-PegInsert-Franka-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:FrankaPegInsertEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FactoryPPORunnerCfg", + }, +) + +gym.register( + id="UW-GearMesh-Franka-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:FrankaGearMeshEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FactoryPPORunnerCfg", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/rsl_rl_ppo_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..1e60f03 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class FactoryPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 128 + max_iterations = 400 + save_interval = 50 + experiment_name = "factory" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 128, 64], + critic_hidden_dims=[512, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=1e-3, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.02, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/ik_del_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/ik_del_env_cfg.py new file mode 100644 index 0000000..443b268 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/ik_del_env_cfg.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions import BinaryJointPositionActionCfg, DifferentialInverseKinematicsActionCfg +from isaaclab.utils import configclass + +from ...factory_tasks import FRANKA_PANDA_CFG +from ...gearmesh_env_cfg import GearMeshEnvCfg +from ...nutthread_env_cfg import NutThreadEnvCfg +from ...peginsert_env_cfg import PegInsertEnvCfg +from . import joint_pos_env_cfg + + +@configclass +class FrankaFactoryIkDelEnvMixIn: + def __post_init__(self: joint_pos_env_cfg.FrankaNutThreadEnvCfg): + super().__post_init__() + self.scene.robot = FRANKA_PANDA_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.robot.actuators["panda_arm1"].stiffness = 800 + self.scene.robot.actuators["panda_arm2"].stiffness = 700 + self.scene.robot.actuators["panda_arm1"].damping = 30 + self.scene.robot.actuators["panda_arm2"].damping = 30 + self.actions.arm_action = DifferentialInverseKinematicsActionCfg( + asset_name="robot", + joint_names=["panda_joint.*"], + body_name="panda_hand", + controller=DifferentialIKControllerCfg(command_type="pose", use_relative_mode=True, ik_method="dls"), + scale=1.0, + body_offset=DifferentialInverseKinematicsActionCfg.OffsetCfg(pos=(0.0, 0.0, 0.107)), + ) + self.actions.gripper_action = BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["panda_finger.*"], + open_command_expr={"panda_finger_.*": 0.04}, + close_command_expr={"panda_finger_.*": 0.0}, + ) + + +@configclass +class FrankaNutThreadIkDeltaEnvCfg(FrankaFactoryIkDelEnvMixIn, NutThreadEnvCfg): + pass + + +@configclass +class FrankaGearMeshIkDeltaEnvCfg(FrankaFactoryIkDelEnvMixIn, GearMeshEnvCfg): + pass + + +@configclass +class FrankaPegInsertIkDeltaEnvCfg(FrankaFactoryIkDelEnvMixIn, PegInsertEnvCfg): + pass diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/joint_pos_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/joint_pos_env_cfg.py new file mode 100644 index 0000000..a9baaac --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/config/franka/joint_pos_env_cfg.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +import uwlab_tasks.manager_based.manipulation.factory_extension.mdp as mdp + +from ...factory_assets_cfg import FRANKA_PANDA_CFG +from ...factory_env_base import FactoryBaseEnvCfg +from ...gearmesh_env_cfg import GearMeshEnvCfg +from ...nutthread_env_cfg import NutThreadEnvCfg +from ...peginsert_env_cfg import PegInsertEnvCfg + + +@configclass +class ActionCfg: + arm_action = mdp.RelativeJointPositionActionCfg( + asset_name="robot", + joint_names=["panda_joint.*"], + scale=0.02, + use_zero_offset=True, + ) + + gripper_action = mdp.BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["panda_finger.*"], + open_command_expr={"panda_finger_.*": 0.0}, + close_command_expr={"panda_finger_.*": 0.0}, + ) + + +@configclass +class FrankaFactoryEnvMixIn: + actions: ActionCfg = ActionCfg() + + def __post_init__(self: FactoryBaseEnvCfg): + super().__post_init__() + self.scene.robot = FRANKA_PANDA_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.robot.actuators["panda_arm1"].stiffness = 80.0 + self.scene.robot.actuators["panda_arm1"].damping = 4.0 + self.scene.robot.actuators["panda_arm2"].stiffness = 80.0 + self.scene.robot.actuators["panda_arm2"].damping = 4.0 + + +@configclass +class FrankaNutThreadEnvCfg(FrankaFactoryEnvMixIn, NutThreadEnvCfg): + pass + + +@configclass +class FrankaGearMeshEnvCfg(FrankaFactoryEnvMixIn, GearMeshEnvCfg): + pass + + +@configclass +class FrankaPegInsertEnvCfg(FrankaFactoryEnvMixIn, PegInsertEnvCfg): + pass diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_assets_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_assets_cfg.py new file mode 100644 index 0000000..f2389b6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_assets_cfg.py @@ -0,0 +1,338 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from uwlab_assets import UWLAB_CLOUD_ASSETS_DIR + +import isaaclab.sim as sim_utils +from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg + +# This is where we will get the Robot that we want to use +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR + +ASSET_DIR = f"{ISAACLAB_NUCLEUS_DIR}/Factory" + + +GROUND_CFG = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -0.868)), +) + +DOMELIGHT_CFG = AssetBaseCfg( + prim_path="/World/DomeLight", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=1500.0), +) + + +FRANKA_PANDA_CFG = ArticulationCfg( + prim_path="/World/envs/env_.*/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/franka_mimic.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "panda_joint1": 0.00871, + "panda_joint2": -0.10368, + "panda_joint3": -0.00794, + "panda_joint4": -1.49139, + "panda_joint5": -0.00083, + "panda_joint6": 1.38774, + "panda_joint7": 0.0, + "panda_finger_joint2": 0.04, + }, + pos=(0.0, 0.0, 0.0), + rot=(1.0, 0.0, 0.0, 0.0), + ), + # Stiffness and dampness of the panda arm parts + # will be set + actuators={ + "panda_arm1": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-4]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit=87, + # velocity_limit=124.6, + ), + "panda_arm2": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[5-7]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit=12, + # velocity_limit=149.5, + ), + # Stiffness and dampness should be zero in order for these to not move + "panda_hand": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint[1-2]"], + effort_limit=40.0, + # velocity_limit=0.04, + stiffness=7500.0, + damping=173.0, + friction=0.1, + armature=0.0, + ), + }, +) + +# Table +TABLE_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Table", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{UWLAB_CLOUD_ASSETS_DIR}/Props/Mounts/UWPatVention/pat_vention.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(0.4, 0.0, -0.868), rot=(0.70711, 0.0, 0.0, -0.70711)), +) + +# NIST Board +NISTBOARD_CFG = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/NistBoard", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{UWLAB_CLOUD_ASSETS_DIR}/Props/NIST/Taskboard/nistboard.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg(kinematic_enabled=True), + ), + init_state=RigidObjectCfg.InitialStateCfg(pos=(0.6, 0.0, 0.00)), +) + +## +# Assembly Tools +## + + +BOLT_M16_CFG = ArticulationCfg( + prim_path="/World/envs/env_.*/BoltAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_bolt_m16.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.05), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.55, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + +NUT_M16_CFG = ArticulationCfg( + prim_path="/World/envs/env_.*/NutAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_nut_m16.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.03), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.4, 0.3, 0.0), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + + +HOLE_8MM_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/HoleAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_hole_8mm.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.05), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.6, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + + +PEG_8MM_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/PegAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_peg_8mm.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.4, 0.35, 0.0), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + +SMALL_GEAR_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/SmallGearAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_gear_small.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + + +LARGE_GEAR_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/LargeGearAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_gear_large.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.45, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + + +GEAR_BASE_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/GearBaseAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_gear_base.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.05), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.65, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) + + +MEDIUM_GEAR_CFG: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/MediumGearAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/factory_gear_medium.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.012), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.4, 0.40, 0.0), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_env_base.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_env_base.py new file mode 100644 index 0000000..c1fbb81 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/factory_env_base.py @@ -0,0 +1,322 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING + +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import ManagerBasedRLEnvCfg, ViewerCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass + +import uwlab_tasks.manager_based.manipulation.factory_extension.mdp as mdp + +from . import factory_assets_cfg as assets + +""" +Base scene definition for Factory Tasks +""" + + +@configclass +class FactorySceneCfg(InteractiveSceneCfg): + """Configuration for a factory task scene.""" + + # Ground plane + ground = assets.GROUND_CFG + + # Table + table = assets.TABLE_CFG + + # NIST Board + nistboard = assets.NISTBOARD_CFG + + # "FIXED ASSETS" + bolt_m16: ArticulationCfg = assets.BOLT_M16_CFG + gear_base: ArticulationCfg = assets.GEAR_BASE_CFG + hole_8mm: ArticulationCfg = assets.HOLE_8MM_CFG + + # "Moving Gears" + small_gear: ArticulationCfg = assets.SMALL_GEAR_CFG + large_gear: ArticulationCfg = assets.LARGE_GEAR_CFG + + # "HELD ASSETS" + nut_m16: ArticulationCfg = assets.NUT_M16_CFG + medium_gear: ArticulationCfg = assets.MEDIUM_GEAR_CFG + peg_8mm: ArticulationCfg = assets.PEG_8MM_CFG + + # Robot Override + robot: ArticulationCfg = MISSING # type: ignore + + # Lights + dome_light = assets.DOMELIGHT_CFG + + +@configclass +class FactoryObservationsCfg: + """Observation specifications for Factory.""" + + @configclass + class PolicyCfg(ObsGroup): + end_effector_vel_lin_ang_b = ObsTerm( + func=mdp.asset_link_velocity_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("robot", body_names="end_effector"), + "root_asset_cfg": SceneEntityCfg("robot"), + }, + ) + + end_effector_pose = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("robot", body_names="end_effector"), + "root_asset_cfg": SceneEntityCfg("robot"), + }, + ) + + fixed_asset_in_end_effector_frame: ObsTerm = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("fixed_asset"), + "root_asset_cfg": SceneEntityCfg("robot", body_names="end_effector"), + }, + ) + + prev_action = ObsTerm(func=mdp.last_action) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + @configclass + class CriticCfg(ObsGroup): + prev_actions = ObsTerm(func=mdp.last_action) + + joint_pos = ObsTerm(func=mdp.joint_pos) + + end_effector_pose = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("robot", body_names="end_effector"), + "root_asset_cfg": SceneEntityCfg("robot"), + }, + ) + + end_effector_vel_lin_ang_b = ObsTerm( + func=mdp.asset_link_velocity_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("robot", body_names="end_effector"), + "root_asset_cfg": SceneEntityCfg("robot"), + }, + ) + + held_asset_pose = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={"target_asset_cfg": SceneEntityCfg("held_asset"), "root_asset_cfg": SceneEntityCfg("robot")}, + ) + + fixed_asset_pose = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={"target_asset_cfg": SceneEntityCfg("fixed_asset"), "root_asset_cfg": SceneEntityCfg("robot")}, + ) + + held_asset_in_fixed_asset_frame: ObsTerm = ObsTerm( + func=mdp.target_asset_pose_in_root_asset_frame, + params={ + "target_asset_cfg": SceneEntityCfg("held_asset"), + "root_asset_cfg": SceneEntityCfg("fixed_asset"), + }, + ) + + def __post_init__(self) -> None: + self.enable_corruption = False + self.concatenate_terms = True + + policy: PolicyCfg = PolicyCfg() + critic: CriticCfg = CriticCfg() + + +@configclass +class FactoryEventCfg: + """Events specifications for Factory""" + + # mode: startup + held_asset_material = EventTerm( + func=mdp.randomize_rigid_body_material, # type: ignore + mode="startup", + params={ + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + "asset_cfg": SceneEntityCfg("held_asset"), + }, + ) + + fixed_asset_material = EventTerm( + func=mdp.randomize_rigid_body_material, # type: ignore + mode="startup", + params={ + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + "asset_cfg": SceneEntityCfg("fixed_asset"), + }, + ) + + robot_material = EventTerm( + func=mdp.randomize_rigid_body_material, # type: ignore + mode="startup", + params={ + "static_friction_range": (0.75, 0.75), + "dynamic_friction_range": (0.75, 0.75), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + "asset_cfg": SceneEntityCfg("robot"), + }, + ) + + # mode: reset + reset_env = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_board = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.01, 0.01), "y": (-0.01, 0.01), "yaw": (-3.14, 3.14)}, + "velocity_range": {}, + "asset_cfg": SceneEntityCfg("nistboard"), + }, + ) + + reset_fixed_asset = EventTerm( + func=mdp.reset_fixed_assets, + mode="reset", + params={ + "asset_list": ["fixed_asset"], + }, + ) + + reset_end_effector_around_fixed_asset = EventTerm( + func=mdp.reset_end_effector_round_fixed_asset, # type: ignore + mode="reset", + params={ + "fixed_asset_cfg": MISSING, + "fixed_asset_offset": MISSING, + "pose_range_b": MISSING, + "robot_ik_cfg": SceneEntityCfg("robot"), + }, + ) + + reset_held_asset_in_hand = EventTerm( + func=mdp.reset_held_asset, + mode="reset", + params={ + "holding_body_cfg": SceneEntityCfg("robot", body_names="end_effector"), + "held_asset_cfg": SceneEntityCfg("held_asset"), + "held_asset_graspable_offset": MISSING, + "held_asset_inhand_range": {}, + }, + ) + + grasp_held_asset = EventTerm( + func=mdp.grasp_held_asset, + mode="reset", + params={"robot_cfg": SceneEntityCfg("robot", body_names="end_effector"), "held_asset_diameter": MISSING}, + ) + + +@configclass +class FactoryRewardsCfg: + """Reward terms for Factory""" + + # penalties + action_l2 = RewTerm(func=mdp.action_l2, weight=-0.0) + + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.0) + + # progress rewards + progress_context = RewTerm( + func=mdp.ProgressContext, # type: ignore + weight=0.01, + params={ + "success_threshold": 0.001, + "held_asset_cfg": SceneEntityCfg("held_asset"), + "fixed_asset_cfg": SceneEntityCfg("fixed_asset"), + "held_asset_offset": MISSING, + "fixed_asset_offset": MISSING, + }, + ) + + # orientation_alignment = RewTerm(func=mdp.orientation_reward, weight=1.0, params={"std": 0.02}) + + concentric_alignment_coarse = RewTerm(func=mdp.concentric_reward, weight=5.0, params={"std": 0.02}) + + concentric_alignment_fine = RewTerm(func=mdp.concentric_reward, weight=10.0, params={"std": 0.005}) + + progress_reward_coarse = RewTerm(func=mdp.progress_reward, weight=5.0, params={"std": 0.02}) + + progress_reward_fine = RewTerm(func=mdp.progress_reward, weight=10.0, params={"std": 0.005}) + + success_reward = RewTerm(func=mdp.success_reward, weight=20.0) + + +@configclass +class FactoryTerminationsCfg: + """Termination terms for Factory.""" + + # (1) Time out + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +## +# Environment configuration +## +@configclass +class FactoryBaseEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the base Factory environment.""" + + scene: FactorySceneCfg = FactorySceneCfg(num_envs=2, env_spacing=2.0) + observations: FactoryObservationsCfg = FactoryObservationsCfg() + events: FactoryEventCfg = FactoryEventCfg() + terminations: FactoryTerminationsCfg = FactoryTerminationsCfg() + rewards: FactoryRewardsCfg = FactoryRewardsCfg() + viewer: ViewerCfg = ViewerCfg( + eye=(0.0, 0.5, 0.1), origin_type="asset_body", asset_name="robot", body_name="panda_fingertip_centered" + ) + actions = MISSING + + # Post initialization + def __post_init__(self) -> None: + """Post initialization.""" + # general settings + self.decimation = 8 # 15hz + self.episode_length_s = 10 + # simulation settings + self.sim.dt = 1 / 120 + self.sim.render_interval = self.decimation + + self.sim.physx.solver_type = 1 + self.sim.physx.max_position_iteration_count = 192 # Important to avoid interpenetration. + self.sim.physx.max_velocity_iteration_count = 1 + self.sim.physx.bounce_threshold_velocity = 0.2 + self.sim.physx.friction_offset_threshold = 0.01 + self.sim.physx.friction_correlation_distance = 0.00625 + self.sim.physx.gpu_max_rigid_contact_count = 2**23 + self.sim.physx.gpu_max_rigid_patch_count = 2**23 + self.sim.physx.gpu_collision_stack_size = 2**31 + self.sim.physx.gpu_max_num_partitions = 1 + + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 + + self.sim.render.enable_ambient_occlusion = True + self.sim.render.enable_dlssg = True diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/gearmesh_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/gearmesh_env_cfg.py new file mode 100644 index 0000000..ad59458 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/gearmesh_env_cfg.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +from .assembly_keypoints import KEYPOINTS_GEARBASE, KEYPOINTS_MEDIUMGEAR +from .factory_env_base import FactoryBaseEnvCfg, FactoryEventCfg, FactoryObservationsCfg, FactoryRewardsCfg + + +@configclass +class GearMeshObservationsCfg(FactoryObservationsCfg): + def __post_init__(self): + # policy + self.policy.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_cfg"] = SceneEntityCfg("gear_base") + self.policy.fixed_asset_in_end_effector_frame.params["root_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_offset"] = ( + KEYPOINTS_GEARBASE.medium_gear_tip_offset + ) + + # critic + self.critic.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.held_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("medium_gear") + self.critic.fixed_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("gear_base") + self.critic.held_asset_in_fixed_asset_frame.params["target_asset_cfg"] = SceneEntityCfg("medium_gear") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_cfg"] = SceneEntityCfg("gear_base") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_offset"] = ( + KEYPOINTS_GEARBASE.medium_gear_tip_offset + ) + + +@configclass +class GearMeshEventCfg(FactoryEventCfg): + def __post_init__(self): + # For asset_material + self.held_asset_material.params["asset_cfg"] = SceneEntityCfg("medium_gear") + self.fixed_asset_material.params["asset_cfg"] = SceneEntityCfg("gear_base") + + # For reset_fixed_asset + self.reset_fixed_asset.params["asset_list"] = ["gear_base", "large_gear", "small_gear"] + + # For reset_hand + self.reset_end_effector_around_fixed_asset.params["fixed_asset_cfg"] = SceneEntityCfg("gear_base") + self.reset_end_effector_around_fixed_asset.params["fixed_asset_offset"] = ( + KEYPOINTS_GEARBASE.medium_gear_tip_offset + ) + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].joint_names = ["panda_joint.*"] + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].body_names = "panda_fingertip_centered" + self.reset_end_effector_around_fixed_asset.params["pose_range_b"] = { + "x": (-0.02, 0.02), + "y": (-0.02, 0.02), + "z": (0.035, 0.045), + "roll": (3.141, 3.141), + "yaw": (-0.785, 0.785), + } + + # For reset_held_asset + self.reset_held_asset_in_hand.params["holding_body_cfg"].body_names = "panda_fingertip_centered" + self.reset_held_asset_in_hand.params["held_asset_cfg"] = SceneEntityCfg("medium_gear") + self.reset_held_asset_in_hand.params["held_asset_graspable_offset"] = KEYPOINTS_MEDIUMGEAR.grasp_point + + # For grasp_held_assset + self.grasp_held_asset.params["robot_cfg"].body_names = "panda_fingertip_centered" + self.grasp_held_asset.params["robot_cfg"].joint_names = "panda_finger_joint[1-2]" + self.grasp_held_asset.params["held_asset_diameter"] = KEYPOINTS_MEDIUMGEAR.grasp_diameter + + +@configclass +class GearMeshRewardsCfg(FactoryRewardsCfg): + def __post_init__(self): + # For progress_context + self.progress_context.params["fixed_asset_cfg"] = SceneEntityCfg("gear_base") + self.progress_context.params["held_asset_cfg"] = SceneEntityCfg("medium_gear") + self.progress_context.params["held_asset_offset"] = KEYPOINTS_MEDIUMGEAR.center_axis_bottom + self.progress_context.params["fixed_asset_offset"] = KEYPOINTS_GEARBASE.medium_gear_assembled_bottom_offset + + +@configclass +class GearMeshEnvCfg(FactoryBaseEnvCfg): + """Configuration for the GearMesh environment.""" + + observations: GearMeshObservationsCfg = GearMeshObservationsCfg() + events: GearMeshEventCfg = GearMeshEventCfg() + rewards: GearMeshRewardsCfg = GearMeshRewardsCfg() + + def __post_init__(self): + super().__post_init__() + for asset in ["bolt_m16", "hole_8mm", "nut_m16", "peg_8mm"]: + delattr(self.scene, asset) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/__init__.py new file mode 100644 index 0000000..ef08ed5 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the Factory environments.""" + +from isaaclab.envs.mdp import * # noqa: F401, F403 + +from .events import * +from .observations import * +from .rewards import * # noqa: F401, F403 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/actions_cfg_nist.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/actions_cfg_nist.py new file mode 100644 index 0000000..03f6f98 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/actions_cfg_nist.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING + +from isaaclab.controllers import DifferentialIKControllerCfg +from isaaclab.managers.action_manager import ActionTerm, ActionTermCfg +from isaaclab.utils import configclass + +from uwlab_tasks.manager_based.manipulation.factory_extension.factory_assets_cfg import FixedAssetCfg + +## +# Joint actions. +## +from uwlab_tasks.manager_based.manipulation.factory_extension.factory_tasks import FactoryTask + +from . import task_space_actions_nist + + +@configclass +class TaskSpaceBoundedDifferentialInverseKinematicsActionNISTCfg(ActionTermCfg): + """Configuration for inverse differential kinematics action term. + + See :class:`DifferentialInverseKinematicsAction` for more details. + """ + + @configclass + class OffsetCfg: + """The offset pose from parent frame to child frame. + + On many robots, end-effector frames are fictitious frames that do not have a corresponding + rigid body. In such cases, it is easier to define this transform w.r.t. their parent rigid body. + For instance, for the Franka Emika arm, the end-effector is defined at an offset to the the + "panda_hand" frame. + """ + + pos: tuple[float, float, float] = (0.0, 0.0, 0.0) + """Translation w.r.t. the parent frame. Defaults to (0.0, 0.0, 0.0).""" + rot: tuple[float, float, float, float] = (1.0, 0.0, 0.0, 0.0) + """Quaternion rotation ``(w, x, y, z)`` w.r.t. the parent frame. Defaults to (1.0, 0.0, 0.0, 0.0).""" + + class_type: type[ActionTerm] = task_space_actions_nist.TaskSpaceBoundedDifferentialInverseKinematicsAction + + joint_names: list[str] = MISSING + """List of joint names or regex expressions that the action will be mapped to.""" + body_name: str = MISSING + """Name of the body or frame for which IK is performed.""" + body_offset: OffsetCfg | None = None + """Offset of target frame w.r.t. to the body frame. Defaults to None, in which case no offset is applied.""" + scale: float | tuple[float, ...] = 1.0 + """Scale factor for the action. Defaults to 1.0.""" + controller: DifferentialIKControllerCfg = MISSING + """The configuration for the differential IK controller.""" + task_cfg: FactoryTask = MISSING + + fixed_asset_cfg: FixedAssetCfg = MISSING diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/task_space_actions_nist.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/task_space_actions_nist.py new file mode 100644 index 0000000..cd8da02 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/actions/task_space_actions_nist.py @@ -0,0 +1,448 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import numpy as np +import torch +from collections.abc import Sequence +from typing import TYPE_CHECKING + +import omni.log + +import isaaclab.utils.math as math_utils +import isaaclab.utils.string as string_utils +from isaaclab.assets.articulation import Articulation +from isaaclab.controllers.differential_ik import DifferentialIKController +from isaaclab.managers.action_manager import ActionTerm + +from uwlab_tasks.manager_based.manipulation.factory_extension.factory_tasks import CtrlCfg, ObsRandCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + from .actions_cfg_nist import TaskSpaceBoundedDifferentialInverseKinematicsActionNISTCfg + + +class TaskSpaceBoundedDifferentialInverseKinematicsAction(ActionTerm): + r"""Inverse Kinematics action term. + + This action term performs pre-processing of the raw actions using scaling transformation. + + .. math:: + \text{action} = \text{scaling} \times \text{input action} + \text{joint position} = J^{-} \times \text{action} + + where :math:`\text{scaling}` is the scaling applied to the input action, and :math:`\text{input action}` + is the input action from the user, :math:`J` is the Jacobian over the articulation's actuated joints, + and \text{joint position} is the desired joint position command for the articulation's joints. + """ + + cfg: TaskSpaceBoundedDifferentialInverseKinematicsActionNISTCfg + """The configuration of the action term.""" + _asset: Articulation + """The articulation asset on which the action term is applied.""" + _scale: torch.Tensor + """The scaling factor applied to the input action. Shape is (1, action_dim).""" + _clip: torch.Tensor + """The clip applied to the input action.""" + + def __init__(self, cfg: TaskSpaceBoundedDifferentialInverseKinematicsActionNISTCfg, env: ManagerBasedEnv): + # initialize the action term + super().__init__(cfg, env) + + self.task_name = "" + + # resolve the joints over which the action term is applied + self._joint_ids, self._joint_names = self._asset.find_joints(self.cfg.joint_names) + self._num_joints = len(self._joint_ids) + # parse the body index + body_ids, body_names = self._asset.find_bodies(self.cfg.body_name) + if len(body_ids) != 1: + raise ValueError( + f"Expected one match for the body name: {self.cfg.body_name}. Found {len(body_ids)}: {body_names}." + ) + # save only the first body index + self._body_idx = body_ids[0] + self._body_name = body_names[0] + # check if articulation is fixed-base + # if fixed-base then the jacobian for the base is not computed + # this means that number of bodies is one less than the articulation's number of bodies + if self._asset.is_fixed_base: + self._jacobi_body_idx = self._body_idx - 1 + self._jacobi_joint_ids = self._joint_ids + else: + self._jacobi_body_idx = self._body_idx + self._jacobi_joint_ids = [i + 6 for i in self._joint_ids] + + # log info for debugging + omni.log.info( + f"Resolved joint names for the action term {self.__class__.__name__}:" + f" {self._joint_names} [{self._joint_ids}]" + ) + omni.log.info( + f"Resolved body name for the action term {self.__class__.__name__}: {self._body_name} [{self._body_idx}]" + ) + # Avoid indexing across all joints for efficiency + if self._num_joints == self._asset.num_joints: + self._joint_ids = slice(None) + + # create the differential IK controller + self._ik_controller = DifferentialIKController( + cfg=self.cfg.controller, num_envs=self.num_envs, device=self.device + ) + + # create tensors for raw and processed actions + self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device) + self._processed_actions = torch.zeros_like(self.raw_actions) + + # save the scale as tensors + self._scale = torch.zeros((self.num_envs, self.action_dim), device=self.device) + self._scale[:] = torch.tensor(self.cfg.scale, device=self.device) + + # convert the fixed offsets to torch tensors of batched shape + if self.cfg.body_offset is not None: + self._offset_pos = torch.tensor(self.cfg.body_offset.pos, device=self.device).repeat(self.num_envs, 1) + self._offset_rot = torch.tensor(self.cfg.body_offset.rot, device=self.device).repeat(self.num_envs, 1) + else: + self._offset_pos, self._offset_rot = None, None + + self.ee_stiffness = torch.tensor(CtrlCfg.default_task_prop_gains, device=self.device).repeat((self.num_envs, 1)) + self.ee_damping = 2 * torch.sqrt(self.ee_stiffness) + + # parse clip + if self.cfg.clip is not None: + if isinstance(cfg.clip, dict): + self._clip = torch.tensor([[-float("inf"), float("inf")]], device=self.device).repeat( + self.num_envs, self.action_dim, 1 + ) + index_list, _, value_list = string_utils.resolve_matching_names_values(self.cfg.clip, self._joint_names) + self._clip[:, index_list] = torch.tensor(value_list, device=self.device) + else: + raise ValueError(f"Unsupported clip type: {type(cfg.clip)}. Supported types are dict.") + + """ + Properties. + """ + + @property + def action_dim(self) -> int: + return self._ik_controller.action_dim + + @property + def raw_actions(self) -> torch.Tensor: + return self._raw_actions + + @property + def processed_actions(self) -> torch.Tensor: + return self._processed_actions + + @property + def jacobian_w(self) -> torch.Tensor: + return self._asset.root_physx_view.get_jacobians()[:, self._jacobi_body_idx, :, self._jacobi_joint_ids] + + @property + def jacobian_b(self) -> torch.Tensor: + jacobian = self.jacobian_w + base_rot = self._asset.data.root_link_quat_w + base_rot_matrix = math_utils.matrix_from_quat(math_utils.quat_inv(base_rot)) + jacobian[:, :3, :] = torch.bmm(base_rot_matrix, jacobian[:, :3, :]) + jacobian[:, 3:, :] = torch.bmm(base_rot_matrix, jacobian[:, 3:, :]) + return jacobian + + """ + Operations. + """ + # WE CAN MAKE ANOTHER CLASS, THE MAIN DIFFERENCE WILL BE HERE, WHAT WE CAN DO IN ADDITION + # TO HAVING CLIP IS THE FOLLOWING + """ + + """ + + def process_actions(self, actions: torch.Tensor): + """ + Action preprocess + + 1. threshold for x,y,z + 2. threshold for r,p,y + 3. modify the yaw properly + 4. larger emphasis on past actions then current actions <-- when did this happen, looking over the code I do not see this + """ + + self._processed_actions = ( + CtrlCfg.ema_factor * actions.to(self.device) + (1 - CtrlCfg.ema_factor) * self._raw_actions + ) + self._raw_actions[:] = actions + + self._processed_actions[:, 0:3] *= torch.tensor(CtrlCfg.pos_action_threshold, device=self.device).repeat( + (self.num_envs, 1) + ) + if self.cfg.task_cfg.unidirectional_rot: + self._processed_actions[:, 5] = -(self._processed_actions[:, 5] + 1) * 0.5 + self._processed_actions[:, 3:] *= torch.tensor(CtrlCfg.rot_action_threshold, device=self.device).repeat( + (self.num_envs, 1) + ) + + # This is not even needed since the scale is 1.0 + self._processed_actions[:] = self._processed_actions * self._scale + + """ + This is clip respect to the center of mid gripper, this will set the max and min value of the delta + gripper pos. + + Restrictions: + - have the actions be within 5cm of the tip of the fixed asset + - have the roll and pitch be fixed. + - roll = 3.141519 - pi + - pitch = 0.0 + """ + # set some useful reference to environment assets states + + fixed_asset: Articulation = self._env.scene["fixed_asset"] + fixed_pos = fixed_asset.data.root_link_pos_w - self._env.scene.env_origins + fixed_quat = fixed_asset.data.root_link_quat_w + fixed_tip_pos_local = torch.zeros_like(fixed_pos) + + # here we want to be able to get the + fixed_tip_pos_local[:, 2] += self.cfg.fixed_asset_cfg.height + fixed_tip_pos_local[:, 2] += self.cfg.fixed_asset_cfg.base_height + fixed_tip_pos, _ = math_utils.combine_frame_transforms(fixed_pos, fixed_quat, fixed_tip_pos_local) + + fixed_asset_pos_noise = torch.randn((self.num_envs, 3), dtype=torch.float32, device=self.device) + fixed_asset_pos_rand = torch.tensor(ObsRandCfg.fixed_asset_pos, dtype=torch.float32, device=self.device) + fixed_asset_pos_noise = fixed_asset_pos_noise @ torch.diag(fixed_asset_pos_rand) + + # clip begins + fixed_pos_action_frame = fixed_tip_pos + fixed_asset_pos_noise + + pos_actions = self._processed_actions[:, 0:3] + rot_actions = self._processed_actions[:, 3:6] + + self.fingertip_midpoint_quat = self._asset.data.body_link_quat_w[:, self._body_idx] + self.fingertip_midpoint_pos = self._asset.data.body_link_pos_w[:, self._body_idx] - self._env.scene.env_origins + + self.ctrl_target_fingertip_midpoint_pos = self.fingertip_midpoint_pos + pos_actions + + # Make sure that the actions are within 5cm of the tip of the fixed asset + delta_pos = self.ctrl_target_fingertip_midpoint_pos - fixed_pos_action_frame + pos_error_clipped = torch.clip(delta_pos, -CtrlCfg.pos_action_bounds[0], CtrlCfg.pos_action_bounds[1]) + self.ctrl_target_fingertip_midpoint_pos = fixed_pos_action_frame + pos_error_clipped + + # Convert to quat and set rot target + angle = torch.norm(rot_actions, p=2, dim=-1) + axis = rot_actions / angle.unsqueeze(-1) + + rot_actions_quat = math_utils.quat_from_angle_axis(angle, axis) + rot_actions_quat = torch.where( + angle.unsqueeze(-1).repeat(1, 4) > 1e-6, + rot_actions_quat, + torch.tensor([1.0, 0.0, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1), + ) + self.ctrl_target_fingertip_midpoint_quat = math_utils.quat_mul(rot_actions_quat, self.fingertip_midpoint_quat) + + target_euler_xyz = torch.stack(math_utils.euler_xyz_from_quat(self.ctrl_target_fingertip_midpoint_quat), dim=1) + + """ + Restrictions for the roll and pitch + """ + target_euler_xyz[:, 0] = 3.14159 # Restrict actions to be upright. + target_euler_xyz[:, 1] = 0.0 + + self.ctrl_target_fingertip_midpoint_quat = math_utils.quat_from_euler_xyz( + roll=target_euler_xyz[:, 0], pitch=target_euler_xyz[:, 1], yaw=target_euler_xyz[:, 2] + ) + + # THIS WE WILL BE DOING WITH THE INVERSE KINEMATICS + self.ctrl_target_gripper_dof_pos = 0.0 + + def apply_actions(self): + joint_torque = torch.zeros((self.num_envs, self._num_joints), device=self.device) + + # to get the pos and quat for the end effector + ee_pos_curr, ee_quat_curr = self._compute_frame_pose() + + # """ + # Use axis angle in order to get (x,y,z) and (rx,ry,rz) + + # This is so that we can get pose + # """ + pos_error, axis_angle_error = math_utils.compute_pose_error( + ee_pos_curr, + ee_quat_curr, + self.ctrl_target_fingertip_midpoint_pos, + self.ctrl_target_fingertip_midpoint_quat, + rot_error_type="axis_angle", + ) + # set command into controller + + delta_fingertip_pose = torch.cat((pos_error, axis_angle_error), dim=1) + + task_wrench = torch.zeros((self.num_envs, 6), device=self.device) + + def _apply_task_space_gains(): + task_wrench = torch.zeros_like(delta_fingertip_pose) + + # Apply gains to lin error components + lin_error = delta_fingertip_pose[:, 0:3] + + default_gains = torch.tensor(CtrlCfg.default_task_prop_gains, device=self.device).repeat((self.num_envs, 1)) + + task_deriv_gains = 2 * torch.sqrt(default_gains) + + task_wrench[:, 0:3] = default_gains[:, 0:3] * lin_error + task_deriv_gains[:, 0:3] * ( + 0.0 - self._asset.data.body_lin_vel_w[..., self._body_idx, :] + ) + + # Apply gains to rot error components + rot_error = delta_fingertip_pose[:, 3:6] + task_wrench[:, 3:6] = default_gains[:, 3:6] * rot_error + task_deriv_gains[:, 3:6] * ( + 0.0 - self._asset.data.body_ang_vel_w[..., self._body_idx, :] + ) + return task_wrench + + task_wrench_motion = _apply_task_space_gains() + + task_wrench += task_wrench_motion + + # Set tau = J^T * tau, i.e., map tau into joint space as desired + jacobian = self._compute_frame_jacobian() + jacobian_T = torch.transpose(jacobian, dim0=1, dim1=2) + joint_torque[:, 0:7] = (jacobian_T @ task_wrench.unsqueeze(-1)).squeeze(-1) + + # # adapted from https://gitlab-master.nvidia.com/carbon-gym/carbgym/-/blob/b4bbc66f4e31b1a1bee61dbaafc0766bbfbf0f58/python/examples/franka_cube_ik_osc.py#L70-78 + # # roboticsproceedings.org/rss07/p31.pdf + + arm_mass_matrix = self._asset.root_physx_view.get_generalized_mass_matrices()[:, 0:7, 0:7] + # useful tensors + arm_mass_matrix_inv = torch.inverse(arm_mass_matrix) + arm_mass_matrix_task = torch.inverse( + jacobian @ torch.inverse(arm_mass_matrix) @ jacobian_T + ) # ETH eq. 3.86; geometric Jacobian is assumed + j_eef_inv = arm_mass_matrix_task @ jacobian @ arm_mass_matrix_inv + default_dof_pos_tensor = torch.tensor(CtrlCfg.default_dof_pos_tensor, device=self.device).repeat( + (self.num_envs, 1) + ) + # nullspace computation + distance_to_default_dof_pos = default_dof_pos_tensor - self._asset.data.joint_pos[:, :7] + distance_to_default_dof_pos = (distance_to_default_dof_pos + torch.pi) % ( + 2 * torch.pi + ) - torch.pi # normalize to [-pi, pi] + + # null space control + u_null = CtrlCfg.kd_null * -self._asset.data.joint_vel[:, :7] + CtrlCfg.kp_null * distance_to_default_dof_pos + u_null = arm_mass_matrix @ u_null.unsqueeze(-1) + torque_null = ( + torch.eye(7, device=self.device).unsqueeze(0) - torch.transpose(jacobian, 1, 2) @ j_eef_inv + ) @ u_null + joint_torque[:, 0:7] += torque_null.squeeze(-1) + + joint_torque = torch.clamp(joint_torque, min=-100.0, max=100.0) + + self._asset.set_joint_effort_target(joint_torque, joint_ids=self._joint_ids) + + self._asset.set_joint_position_target(torch.zeros((self.num_envs, 2), device=self.device), joint_ids=[7, 8]) + + def reset(self, env_ids: Sequence[int] | None = None) -> None: + """ + This is the reset portion for actions + + Code references from factory_env.py's randomize_initial_state + """ + if self.task_name == "": + self.task_name: str = self.cfg.task_cfg.name + + self._raw_actions[env_ids] = 0.0 + + # Custom part from NIST assembly - reset + fixed_asset: Articulation = self._env.scene["fixed_asset"] + robot: Articulation = self._env.scene["robot"] + fixed_pos = fixed_asset.data.root_link_pos_w - self._env.scene.env_origins + fixed_quat = fixed_asset.data.root_link_quat_w + + fixed_tip_pos_local = torch.zeros_like(fixed_pos) + fixed_tip_pos_local[:, 2] += self.cfg.fixed_asset_cfg.height + self.cfg.fixed_asset_cfg.base_height + fixed_tip_pos, _ = math_utils.combine_frame_transforms(fixed_pos, fixed_quat, fixed_tip_pos_local) + + fixed_asset_pos_noise = torch.randn((len(env_ids), 3), dtype=torch.float32, device=self.device) + fixed_asset_pos_rand = torch.tensor(ObsRandCfg.fixed_asset_pos, dtype=torch.float32, device=self.device) + fixed_asset_pos_noise = fixed_asset_pos_noise @ torch.diag(fixed_asset_pos_rand) + self.init_fixed_pos_obs_noise = fixed_asset_pos_noise + fixed_pos_action_frame = fixed_tip_pos + self.init_fixed_pos_obs_noise + fingertip_midpoint_quat = robot.data.body_link_quat_w[:, self._body_idx] + + fingertip_midpoint_pos = robot.data.body_link_pos_w[:, self._body_idx] - self._env.scene.env_origins + + pos_actions = fingertip_midpoint_pos - fixed_pos_action_frame + pos_action_bounds = torch.tensor(CtrlCfg.pos_action_bounds, device=self.device) + pos_actions = pos_actions @ torch.diag(1.0 / pos_action_bounds) + self._raw_actions[:, 0:3] = pos_actions + + # Relative yaw to fixed asset. + unrot_180_euler = torch.tensor([np.pi, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1) + unrot_quat = math_utils.quat_from_euler_xyz( + roll=unrot_180_euler[:, 0], pitch=unrot_180_euler[:, 1], yaw=unrot_180_euler[:, 2] + ) + + fingertip_quat_rel_fixed_asset = math_utils.quat_mul(unrot_quat, fingertip_midpoint_quat) + fingertip_yaw_fixed_asset = math_utils.euler_xyz_from_quat(fingertip_quat_rel_fixed_asset)[-1] + fingertip_yaw_fixed_asset = torch.where( + fingertip_yaw_fixed_asset > torch.pi / 2, + fingertip_yaw_fixed_asset - 2 * torch.pi, + fingertip_yaw_fixed_asset, + ) + fingertip_yaw_fixed_asset = torch.where( + fingertip_yaw_fixed_asset < -torch.pi, fingertip_yaw_fixed_asset + 2 * torch.pi, fingertip_yaw_fixed_asset + ) + + yaw_action = (fingertip_yaw_fixed_asset + np.deg2rad(180.0)) / np.deg2rad(270.0) * 2.0 - 1.0 + self._raw_actions[:, 5] = yaw_action + + self._target_joint_pos_at_reset = robot.data.joint_pos_target.clone() + + """ + Helper functions. + """ + + def _compute_frame_pose(self) -> tuple[torch.Tensor, torch.Tensor]: + """Computes the pose of the target frame in the root frame. + + Returns: + A tuple of the body's position and orientation in the root frame. + """ + # obtain quantities from simulation + ee_pos_w = self._asset.data.body_link_pos_w[:, self._body_idx] + ee_quat_w = self._asset.data.body_link_quat_w[:, self._body_idx] + root_pos_w = self._asset.data.root_link_pos_w + root_quat_w = self._asset.data.root_link_quat_w + # compute the pose of the body in the root frame + ee_pose_b, ee_quat_b = math_utils.subtract_frame_transforms(root_pos_w, root_quat_w, ee_pos_w, ee_quat_w) + # account for the offset + + ee_pose_b, ee_quat_b = math_utils.combine_frame_transforms( + ee_pose_b, ee_quat_b, self._offset_pos, self._offset_rot + ) + + return ee_pose_b, ee_quat_b + + def _compute_frame_jacobian(self): + """Computes the geometric Jacobian of the target frame in the root frame. + + This function accounts for the target frame offset and applies the necessary transformations to obtain + the right Jacobian from the parent body Jacobian. + """ + # read the parent jacobian + jacobian = self.jacobian_b + # account for the offset + # Modify the jacobian to account for the offset + # -- translational part + # v_link = v_ee + w_ee x r_link_ee = v_J_ee * q + w_J_ee * q x r_link_ee + # = (v_J_ee + w_J_ee x r_link_ee ) * q + # = (v_J_ee - r_link_ee_[x] @ w_J_ee) * q + jacobian[:, 0:3, :] += torch.bmm(-math_utils.skew_symmetric_matrix(self._offset_pos), jacobian[:, 3:, :]) + # -- rotational part + # w_link = R_link_ee @ w_ee + jacobian[:, 3:, :] = torch.bmm(math_utils.matrix_from_quat(self._offset_rot), jacobian[:, 3:, :]) + + return jacobian diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/events.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/events.py new file mode 100644 index 0000000..db599c0 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/events.py @@ -0,0 +1,153 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.controllers import DifferentialIKControllerCfg +from isaaclab.envs.mdp.actions.task_space_actions import DifferentialInverseKinematicsAction +from isaaclab.managers import EventTermCfg, ManagerTermBase, SceneEntityCfg +from isaaclab.utils import math as math_utils +from uwlab.envs.mdp.actions.actions_cfg import DifferentialInverseKinematicsActionCfg + +from ..assembly_keypoints import KEYPOINTS_NISTBOARD + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + from ..assembly_keypoints import Offset + +# viz for debug, remove when done debugging +# from isaaclab.markers import FRAME_MARKER_CFG, VisualizationMarkers +# frame_marker_cfg = FRAME_MARKER_CFG.copy() # type: ignore +# frame_marker_cfg.markers["frame"].scale = (0.025, 0.025, 0.025) +# pose_marker = VisualizationMarkers(frame_marker_cfg.replace(prim_path="/Visuals/debug_transform")) + + +def reset_fixed_assets(env: ManagerBasedRLEnv, env_ids: torch.tensor, asset_list: list[str]): + nistboard: RigidObject = env.scene["nistboard"] + for asset_str in asset_list: + asset: Articulation | RigidObject = env.scene[asset_str] + asset_offset_on_nist_board: Offset = getattr(KEYPOINTS_NISTBOARD, asset_str) + asset_on_board_pos, asset_on_board_quat = asset_offset_on_nist_board.apply(nistboard) + root_pose = torch.cat((asset_on_board_pos, asset_on_board_quat), dim=1)[env_ids] + asset.write_root_pose_to_sim(root_pose, env_ids=env_ids) + asset.write_root_velocity_to_sim(torch.zeros_like(asset.data.root_vel_w[env_ids]), env_ids=env_ids) + + +def reset_held_asset( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + holding_body_cfg: SceneEntityCfg, + held_asset_cfg: SceneEntityCfg, + held_asset_graspable_offset: Offset, + held_asset_inhand_range: dict[str, tuple[float, float]], +): + robot: Articulation = env.scene[holding_body_cfg.name] + held_asset: Articulation = env.scene[held_asset_cfg.name] + + end_effector_quat_w = robot.data.body_link_quat_w[env_ids, holding_body_cfg.body_ids].view(-1, 4) + end_effector_pos_w = robot.data.body_link_pos_w[env_ids, holding_body_cfg.body_ids].view(-1, 3) + held_graspable_pos_b = torch.tensor(held_asset_graspable_offset.pos, device=env.device).repeat(len(env_ids), 1) + held_graspable_quat_b = torch.tensor(held_asset_graspable_offset.quat, device=env.device).repeat(len(env_ids), 1) + + flip_z_quat = torch.tensor([[0.0, 0.0, 1.0, 0.0]], device=env.device).repeat(len(env_ids), 1) + translated_held_asset_pos, translated_held_asset_quat = _pose_a_when_frame_ba_aligns_pose_c( + pos_c=end_effector_pos_w, + quat_c=math_utils.quat_mul(end_effector_quat_w, flip_z_quat), + pos_ba=held_graspable_pos_b, + quat_ba=held_graspable_quat_b, + ) + + # Add randomization + range_list = [held_asset_inhand_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] + ranges = torch.tensor(range_list, device=env.device) + samples = math_utils.sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 6), device=env.device) + new_pos_w = translated_held_asset_pos + samples[:, 0:3] + quat_b = math_utils.quat_from_euler_xyz(samples[:, 3], samples[:, 4], samples[:, 5]) + new_quat_w = math_utils.quat_mul(translated_held_asset_quat, quat_b) + + held_asset.write_root_link_pose_to_sim(torch.cat([new_pos_w, new_quat_w], dim=1), env_ids=env_ids) # type: ignore + held_asset.write_root_com_velocity_to_sim(held_asset.data.default_root_state[env_ids, 7:], env_ids=env_ids) # type: ignore + + +def grasp_held_asset( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + robot_cfg: SceneEntityCfg, + held_asset_diameter: float, +) -> None: + robot: Articulation = env.scene[robot_cfg.name] + joint_pos = robot.data.joint_pos[:, robot_cfg.joint_ids][env_ids].clone() + joint_pos[:, :] = held_asset_diameter / 2 * 1.25 + robot.write_joint_state_to_sim(joint_pos, torch.zeros_like(joint_pos), robot_cfg.joint_ids, env_ids) # type: ignore + + +class reset_end_effector_round_fixed_asset(ManagerTermBase): + def __init__(self, cfg: EventTermCfg, env: ManagerBasedRLEnv): + fixed_asset_cfg: SceneEntityCfg = cfg.params.get("fixed_asset_cfg") # type: ignore + fixed_asset_offset: Offset = cfg.params.get("fixed_asset_offset") # type: ignore + pose_range_b: dict[str, tuple[float, float]] = cfg.params.get("pose_range_b") # type: ignore + robot_ik_cfg: SceneEntityCfg = cfg.params.get("robot_ik_cfg", SceneEntityCfg("robot")) + + range_list = [pose_range_b.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] + self.ranges = torch.tensor(range_list, device=env.device) + self.fixed_asset: Articulation | RigidObject = env.scene[fixed_asset_cfg.name] + self.fixed_asset_offset: Offset = fixed_asset_offset + self.robot: Articulation = env.scene[robot_ik_cfg.name] + self.joint_ids: list[int] | slice = robot_ik_cfg.joint_ids + self.robot_ik_solver_cfg = DifferentialInverseKinematicsActionCfg( + asset_name=robot_ik_cfg.name, + joint_names=robot_ik_cfg.joint_names, # type: ignore + body_name=robot_ik_cfg.body_names, # type: ignore + controller=DifferentialIKControllerCfg(command_type="pose", use_relative_mode=False, ik_method="dls"), + scale=1.0, + ) + self.solver: DifferentialInverseKinematicsAction = None # type: ignore + + def __call__( + self, + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + fixed_asset_cfg: SceneEntityCfg, + fixed_asset_offset: Offset, + pose_range_b: dict[str, tuple[float, float]], + robot_ik_cfg: SceneEntityCfg, + ) -> None: + if self.solver is None: + self.solver = self.robot_ik_solver_cfg.class_type(self.robot_ik_solver_cfg, env) + fixed_tip_pos_w, fixed_tip_quat_w = self.fixed_asset_offset.apply(self.fixed_asset) + samples = math_utils.sample_uniform(self.ranges[:, 0], self.ranges[:, 1], (len(env_ids), 6), device=env.device) + pos_b, quat_b = self.solver._compute_frame_pose() + # for those non_reset_id, we will let ik solve for its current position + pos_w = fixed_tip_pos_w[env_ids] + samples[:, 0:3] + quat_w = math_utils.quat_from_euler_xyz(samples[:, 3], samples[:, 4], samples[:, 5]) + pos_b[env_ids], quat_b[env_ids] = math_utils.subtract_frame_transforms( + self.robot.data.root_link_pos_w[env_ids], self.robot.data.root_link_quat_w[env_ids], pos_w, quat_w + ) + self.solver.process_actions(torch.cat([pos_b, quat_b], dim=1)) + n_joints: int = self.robot.num_joints if isinstance(self.joint_ids, slice) else len(self.joint_ids) + # Error Rate 75% ^ 10 = 0.05 (final error) + for i in range(10): + self.solver.apply_actions() + delta_joint_pos = 0.25 * (self.robot.data.joint_pos_target[env_ids] - self.robot.data.joint_pos[env_ids]) + self.robot.write_joint_state_to_sim( + position=(delta_joint_pos + self.robot.data.joint_pos[env_ids])[:, self.joint_ids], + velocity=torch.zeros((len(env_ids), n_joints), device=env.device), + joint_ids=self.joint_ids, + env_ids=env_ids, # type: ignore + ) + + +def _pose_a_when_frame_ba_aligns_pose_c( + pos_c: torch.Tensor, quat_c: torch.Tensor, pos_ba: torch.Tensor, quat_ba: torch.Tensor +) -> tuple[torch.Tensor, torch.Tensor]: + # TA←W ​= {TB←A}-1 ​∘ TC←W​ where ​combine_transform(a,b): b∘a + inv_pos_ba = -math_utils.quat_apply(math_utils.quat_inv(quat_ba), pos_ba) + inv_quat_ba = math_utils.quat_inv(quat_ba) + return math_utils.combine_frame_transforms(pos_c, quat_c, inv_pos_ba, inv_quat_ba) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/observations.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/observations.py new file mode 100644 index 0000000..2009303 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/observations.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + from ..assembly_keypoints import Offset + + +def target_asset_pose_in_root_asset_frame( + env: ManagerBasedEnv, + target_asset_cfg: SceneEntityCfg, + root_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + target_asset_offset: Offset | None = None, + root_asset_offset: Offset | None = None, +): + target_asset: RigidObject | Articulation = env.scene[target_asset_cfg.name] + root_asset: RigidObject | Articulation = env.scene[root_asset_cfg.name] + + taget_body_idx = 0 if isinstance(target_asset_cfg.body_ids, slice) else target_asset_cfg.body_ids + root_body_idx = 0 if isinstance(root_asset_cfg.body_ids, slice) else root_asset_cfg.body_ids + + target_pos = target_asset.data.body_link_pos_w[:, taget_body_idx].view(-1, 3) + target_quat = target_asset.data.body_link_quat_w[:, taget_body_idx].view(-1, 4) + root_pos = root_asset.data.body_link_pos_w[:, root_body_idx].view(-1, 3) + root_quat = root_asset.data.body_link_quat_w[:, root_body_idx].view(-1, 4) + + if root_asset_offset is not None: + root_pos, root_quat = root_asset_offset.combine(root_pos, root_quat) + if target_asset_offset is not None: + target_pos, target_quat = target_asset_offset.combine(target_pos, target_quat) + + target_pos_b, target_quat_b = math_utils.subtract_frame_transforms(root_pos, root_quat, target_pos, target_quat) + return torch.cat([target_pos_b, target_quat_b], dim=1) + + +def asset_link_velocity_in_root_asset_frame( + env: ManagerBasedEnv, + target_asset_cfg: SceneEntityCfg, + root_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + target_asset: RigidObject | Articulation = env.scene[target_asset_cfg.name] + root_asset: RigidObject | Articulation = env.scene[root_asset_cfg.name] + + taget_body_idx = 0 if isinstance(target_asset_cfg.body_ids, slice) else target_asset_cfg.body_ids + + asset_lin_vel_b, _ = math_utils.subtract_frame_transforms( + root_asset.data.root_pos_w, + root_asset.data.root_quat_w, + target_asset.data.body_lin_vel_w[:, taget_body_idx].view(-1, 3), + ) + asset_ang_vel_b, _ = math_utils.subtract_frame_transforms( + root_asset.data.root_pos_w, + root_asset.data.root_quat_w, + target_asset.data.body_lin_vel_w[:, taget_body_idx].view(-1, 3), + ) + + return torch.cat([asset_lin_vel_b, asset_ang_vel_b], dim=1) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/rewards.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/rewards.py new file mode 100644 index 0000000..1c5eacc --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/mdp/rewards.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg + +from ..assembly_keypoints import Offset + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +class ProgressContext(ManagerTermBase): + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self.held_asset: Articulation | RigidObject = env.scene[cfg.params.get("held_asset_cfg").name] # type: ignore + self.fixed_asset: Articulation | RigidObject = env.scene[cfg.params.get("fixed_asset_cfg").name] # type: ignore + self.held_asset_offset: Offset = cfg.params.get("held_asset_offset") # type: ignore + self.fixed_asset_offset: Offset = cfg.params.get("fixed_asset_offset") # type: ignore + self.success_threshold: float = cfg.params.get("success_threshold") # type: ignore + + self.orientation_aligned = torch.zeros((env.num_envs), dtype=torch.bool, device=env.device) + self.position_centered = torch.zeros((env.num_envs), dtype=torch.bool, device=env.device) + self.z_distance_reached = torch.zeros((env.num_envs), dtype=torch.bool, device=env.device) + self.euler_xy_diff = torch.zeros((env.num_envs), device=env.device) + self.xy_distance = torch.zeros((env.num_envs), device=env.device) + self.z_distance = torch.zeros((env.num_envs), device=env.device) + + def __call__( + self, + env: ManagerBasedRLEnv, + success_threshold: float, + held_asset_cfg: SceneEntityCfg, + fixed_asset_cfg: SceneEntityCfg, + held_asset_offset: Offset, + fixed_asset_offset: Offset, + ) -> torch.Tensor: + held_asset_alignment_pos_w, held_asset_alignment_quat_w = self.held_asset_offset.apply(self.held_asset) + fixed_asset_alignment_pos_w, fixed_asset_alignment_quat_w = self.fixed_asset_offset.apply(self.fixed_asset) + held_asset_in_fixed_asset_frame_pos, held_asset_in_fixed_asset_frame_quat = ( + math_utils.subtract_frame_transforms( + fixed_asset_alignment_pos_w, + fixed_asset_alignment_quat_w, + held_asset_alignment_pos_w, + held_asset_alignment_quat_w, + ) + ) + + e_x, e_y, _ = math_utils.euler_xyz_from_quat(held_asset_in_fixed_asset_frame_quat) + self.euler_xy_diff[:] = math_utils.wrap_to_pi(e_x).abs() + math_utils.wrap_to_pi(e_y).abs() + self.xy_distance[:] = torch.norm(held_asset_in_fixed_asset_frame_pos[:, 0:2], dim=1) + self.z_distance[:] = held_asset_in_fixed_asset_frame_pos[:, 2] + self.orientation_aligned[:] = self.euler_xy_diff < 0.025 + self.position_centered[:] = self.xy_distance < 0.0025 + self.z_distance_reached[:] = self.z_distance < self.success_threshold + + return torch.zeros(env.num_envs, device=env.device) + + +def orientation_reward(env: ManagerBasedRLEnv, std: float, context: str = "progress_context") -> torch.Tensor: + context_term: ManagerTermBase = env.reward_manager.get_term_cfg(context).func # type: ignore + euler_xy_diff: torch.Tensor = getattr(context_term, "euler_xy_diff") + return 1 - torch.tanh(euler_xy_diff / std) + + +def concentric_reward(env: ManagerBasedRLEnv, std: float, context: str = "progress_context") -> torch.Tensor: + context_term: ManagerTermBase = env.reward_manager.get_term_cfg(context).func # type: ignore + xy_distance: torch.Tensor = getattr(context_term, "xy_distance") + orientation_aligned: torch.Tensor = getattr(context_term, "orientation_aligned") + return torch.where(orientation_aligned, 1 - torch.tanh(xy_distance / std), 0.0) + + +def progress_reward(env: ManagerBasedRLEnv, std: float, context: str = "progress_context") -> torch.Tensor: + context_term: ManagerTermBase = env.reward_manager.get_term_cfg(context).func # type: ignore + orientation_aligned: torch.Tensor = getattr(context_term, "orientation_aligned") + position_centered: torch.Tensor = getattr(context_term, "position_centered") + z_distance: torch.Tensor = getattr(context_term, "z_distance") + return torch.where(orientation_aligned & position_centered, 1 - torch.tanh(z_distance / std), 0.0) + + +def success_reward(env: ManagerBasedRLEnv, context: str = "progress_context") -> torch.Tensor: + context_term: ManagerTermBase = env.reward_manager.get_term_cfg(context).func # type: ignore + orientation_aligned: torch.Tensor = getattr(context_term, "orientation_aligned") + position_centered: torch.Tensor = getattr(context_term, "position_centered") + z_distance_reached: torch.Tensor = getattr(context_term, "z_distance_reached") + return torch.where(orientation_aligned & position_centered & z_distance_reached, 1.0, 0.0) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/nutthread_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/nutthread_env_cfg.py new file mode 100644 index 0000000..958f999 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/nutthread_env_cfg.py @@ -0,0 +1,88 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +from .assembly_keypoints import KEYPOINTS_BOLTM16, KEYPOINTS_NUTM16 +from .factory_env_base import FactoryBaseEnvCfg, FactoryEventCfg, FactoryObservationsCfg, FactoryRewardsCfg + + +@configclass +class NutThreadObservationsCfg(FactoryObservationsCfg): + def __post_init__(self): + # policy + self.policy.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_cfg"] = SceneEntityCfg("bolt_m16") + self.policy.fixed_asset_in_end_effector_frame.params["root_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_offset"] = KEYPOINTS_BOLTM16.bolt_tip_offset + + # critic + self.critic.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.held_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("nut_m16") + self.critic.fixed_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("bolt_m16") + self.critic.held_asset_in_fixed_asset_frame.params["target_asset_cfg"] = SceneEntityCfg("nut_m16") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_cfg"] = SceneEntityCfg("bolt_m16") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_offset"] = KEYPOINTS_BOLTM16.bolt_tip_offset + + +@configclass +class NutThreadEventCfg(FactoryEventCfg): + def __post_init__(self): + # For asset_material + self.held_asset_material.params["asset_cfg"] = SceneEntityCfg("nut_m16") + self.fixed_asset_material.params["asset_cfg"] = SceneEntityCfg("bolt_m16") + + # For reset_fixed_asset + self.reset_fixed_asset.params["asset_list"] = ["bolt_m16"] + + # For reset_hand + self.reset_end_effector_around_fixed_asset.params["fixed_asset_cfg"] = SceneEntityCfg("bolt_m16") + self.reset_end_effector_around_fixed_asset.params["fixed_asset_offset"] = KEYPOINTS_BOLTM16.bolt_tip_offset + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].joint_names = ["panda_joint.*"] + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].body_names = "panda_fingertip_centered" + self.reset_end_effector_around_fixed_asset.params["pose_range_b"] = { + "x": (-0.02, 0.02), + "y": (-0.02, 0.02), + "z": (0.015, 0.025), + "roll": (3.141, 3.141), + "yaw": (1.57, 2.09), + } + + # For reset_held_asset + self.reset_held_asset_in_hand.params["holding_body_cfg"].body_names = "panda_fingertip_centered" + self.reset_held_asset_in_hand.params["held_asset_cfg"] = SceneEntityCfg("nut_m16") + self.reset_held_asset_in_hand.params["held_asset_graspable_offset"] = KEYPOINTS_NUTM16.grasp_point + + # For grasp_held_assset + self.grasp_held_asset.params["robot_cfg"].body_names = "panda_fingertip_centered" + self.grasp_held_asset.params["robot_cfg"].joint_names = "panda_finger_joint[1-2]" + self.grasp_held_asset.params["held_asset_diameter"] = KEYPOINTS_NUTM16.grasp_diameter + + +@configclass +class NutThreadRewardsCfg(FactoryRewardsCfg): + def __post_init__(self): + # For progress_context + self.progress_context.params["fixed_asset_cfg"] = SceneEntityCfg("bolt_m16") + self.progress_context.params["held_asset_cfg"] = SceneEntityCfg("nut_m16") + self.progress_context.params["held_asset_offset"] = KEYPOINTS_NUTM16.center_axis_bottom + self.progress_context.params["fixed_asset_offset"] = KEYPOINTS_BOLTM16.screwed_nut_offset + + +@configclass +class NutThreadEnvCfg(FactoryBaseEnvCfg): + """Configuration for the NutThread environment.""" + + observations: NutThreadObservationsCfg = NutThreadObservationsCfg() + events: NutThreadEventCfg = NutThreadEventCfg() + rewards: NutThreadRewardsCfg = NutThreadRewardsCfg() + + def __post_init__(self): + super().__post_init__() + for asset in ["gear_base", "hole_8mm", "small_gear", "large_gear", "medium_gear", "peg_8mm"]: + delattr(self.scene, asset) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/peginsert_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/peginsert_env_cfg.py new file mode 100644 index 0000000..54a55d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/factory_extension/peginsert_env_cfg.py @@ -0,0 +1,88 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +from .assembly_keypoints import KEYPOINTS_HOLE8MM, KEYPOINTS_PEG8MM +from .factory_env_base import FactoryBaseEnvCfg, FactoryEventCfg, FactoryObservationsCfg, FactoryRewardsCfg + + +@configclass +class PegInsertObservationsCfg(FactoryObservationsCfg): + def __post_init__(self): + # policy + self.policy.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_cfg"] = SceneEntityCfg("hole_8mm") + self.policy.fixed_asset_in_end_effector_frame.params["root_asset_cfg"].body_names = "panda_fingertip_centered" + self.policy.fixed_asset_in_end_effector_frame.params["target_asset_offset"] = KEYPOINTS_HOLE8MM.hole_tip_offset + + # critic + self.critic.end_effector_pose.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.end_effector_vel_lin_ang_b.params["target_asset_cfg"].body_names = "panda_fingertip_centered" + self.critic.held_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("peg_8mm") + self.critic.fixed_asset_pose.params["target_asset_cfg"] = SceneEntityCfg("hole_8mm") + self.critic.held_asset_in_fixed_asset_frame.params["target_asset_cfg"] = SceneEntityCfg("peg_8mm") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_cfg"] = SceneEntityCfg("hole_8mm") + self.critic.held_asset_in_fixed_asset_frame.params["root_asset_offset"] = KEYPOINTS_HOLE8MM.hole_tip_offset + + +@configclass +class PegInsertEventCfg(FactoryEventCfg): + def __post_init__(self): + # For asset_material + self.held_asset_material.params["asset_cfg"] = SceneEntityCfg("peg_8mm") + self.fixed_asset_material.params["asset_cfg"] = SceneEntityCfg("hole_8mm") + + # For reset_fixed_asset + self.reset_fixed_asset.params["asset_list"] = ["hole_8mm"] + + # For reset_end_effector_around_fixed_asset + self.reset_end_effector_around_fixed_asset.params["fixed_asset_cfg"] = SceneEntityCfg("hole_8mm") + self.reset_end_effector_around_fixed_asset.params["fixed_asset_offset"] = KEYPOINTS_HOLE8MM.hole_tip_offset + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].joint_names = ["panda_joint.*"] + self.reset_end_effector_around_fixed_asset.params["robot_ik_cfg"].body_names = "panda_fingertip_centered" + self.reset_end_effector_around_fixed_asset.params["pose_range_b"] = { + "x": (-0.02, 0.02), + "y": (-0.02, 0.02), + "z": (0.047, 0.057), + "roll": (3.141, 3.141), + "yaw": (-0.785, 0.785), + } + + # For reset_held_asset + self.reset_held_asset_in_hand.params["holding_body_cfg"].body_names = "panda_fingertip_centered" + self.reset_held_asset_in_hand.params["held_asset_cfg"] = SceneEntityCfg("peg_8mm") + self.reset_held_asset_in_hand.params["held_asset_graspable_offset"] = KEYPOINTS_PEG8MM.grasp_point + + # For grasp_held_assset + self.grasp_held_asset.params["robot_cfg"].body_names = "panda_fingertip_centered" + self.grasp_held_asset.params["robot_cfg"].joint_names = "panda_finger_joint[1-2]" + self.grasp_held_asset.params["held_asset_diameter"] = KEYPOINTS_PEG8MM.grasp_diameter + + +@configclass +class PegInsertRewardsCfg(FactoryRewardsCfg): + def __post_init__(self): + # For progress_context + self.progress_context.params["fixed_asset_cfg"] = SceneEntityCfg("hole_8mm") + self.progress_context.params["held_asset_cfg"] = SceneEntityCfg("peg_8mm") + self.progress_context.params["held_asset_offset"] = KEYPOINTS_PEG8MM.center_axis_bottom + self.progress_context.params["fixed_asset_offset"] = KEYPOINTS_HOLE8MM.inserted_peg_base_offset + + +@configclass +class PegInsertEnvCfg(FactoryBaseEnvCfg): + """Configuration for the PegInsert environment.""" + + observations: PegInsertObservationsCfg = PegInsertObservationsCfg() + events: PegInsertEventCfg = PegInsertEventCfg() + rewards: PegInsertRewardsCfg = PegInsertRewardsCfg() + + def __post_init__(self): + super().__post_init__() + for asset in ["bolt_m16", "gear_base", "small_gear", "large_gear", "medium_gear", "nut_m16"]: + delattr(self.scene, asset) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/__init__.py new file mode 100644 index 0000000..fcdf886 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Track Goal Environments.""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/__init__.py new file mode 100644 index 0000000..13cbbf8 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for the track goal environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/__init__.py new file mode 100644 index 0000000..def39f9 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/__init__.py @@ -0,0 +1,44 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +""" +Leap +""" +gym.register( + id="UW-TrackGoal-Leap-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_leap:TrackGoalLeapJointPosition", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) + + +gym.register( + id="UW-TrackGoal-Leap-IkAbs-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_leap:TrackGoalLeapMcIkAbs", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + "teleop_cfg_entry_point": "uwlab_assets.robots.leap.teleop:LeapTeleopCfg", + }, + disable_env_checker=True, +) + + +gym.register( + id="UW-TrackGoal-Leap-IkDel-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_leap:TrackGoalLeapMcIkDel", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..3e7865f --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/agents/rsl_rl_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class Base_PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 + max_iterations = 1500 + save_interval = 50 + resume = False + experiment_name = "track_goal_agent" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/track_goal_leap.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/track_goal_leap.py new file mode 100644 index 0000000..17e2b9d --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/leap/track_goal_leap.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.leap as leap + +from isaaclab.utils import configclass + +from ... import track_goal_env + +episode_length = 50.0 + + +@configclass +class TrackGoalLeap(track_goal_env.TrackGoalEnv): + def __post_init__(self): + super().__post_init__() + self.scene.robot = leap.IMPLICIT_LEAP6D.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.commands.ee_pose.body_name = "palm_lower" + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_orientation_tracking_fine_grained.params["asset_cfg"].body_names = "palm_lower" + + +@configclass +class TrackGoalLeapJointPosition(TrackGoalLeap): + actions = leap.LeapJointPositionAction() # type: ignore + + +@configclass +class TrackGoalLeapMcIkAbs(TrackGoalLeap): + actions = leap.LeapMcIkAbsoluteAction() # type: ignore + + +@configclass +class TrackGoalLeapMcIkDel(TrackGoalLeap): + actions = leap.LeapMcIkDeltaAction() # type: ignore diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/__init__.py new file mode 100644 index 0000000..3a1db74 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +gym.register( + id="UW-Track-Goal-Tycho-IkDel-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.tycho_track_goal:GoalTrackingTychoIkdelta", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-Track-Goal-Tycho-IkAbs-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.tycho_track_goal:GoalTrackingTychoIkabsolute", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + "teleop_cfg_entry_point": "uwlab_assets.robots.tycho.teleop:TychoTeleopCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-Track-Goal-Tycho-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.tycho_track_goal:GoalTrackingTychoJointPosition", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..1c92263 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/agents/rsl_rl_cfg.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class Base_PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 96 + max_iterations = 1500 + save_interval = 50 + resume = False + experiment_name = "hebi_base_agent" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +@configclass +class IdealPdScalePPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "idealPd_scale_experiment" + + +@configclass +class Strategy4ScalePPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "strategy4_scale_experiment" + + +@configclass +class Strategy3ScalePPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "strategy3_scale_experiment" + + +@configclass +class AgentUpdateRatePPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "agent_update_rate_experiment" + + +@configclass +class DecimationPPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "decimation_experiment" + + +@configclass +class Hebi_IkAbsoluteDls_PwmMotor_AbsoluteGoalTracking_PPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "Hebi_IkAbsoluteDls_PwmMotor_AbsoluteGoalTracking" + + +@configclass +class Hebi_IkDeltaDls_PwmMotor_DeltaGoalTracking_EnvPPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "Hebi_IkDeltaDls_PwmMotor_DeltaGoalTracking" + + +@configclass +class Hebi_IkAbsoluteDls_IdealPD_AbsoluteGoalTracking_EnvPPORunnerCfg(Base_PPORunnerCfg): + experiment_name = "Hebi_IkAbsoluteDls_IdealPD_AbsoluteGoalTracking" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/tycho_track_goal.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/tycho_track_goal.py new file mode 100644 index 0000000..5e56739 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/tycho/tycho_track_goal.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.tycho as tycho +import uwlab_assets.robots.tycho.mdp as tycho_mdp + +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass + +import uwlab_tasks.manager_based.manipulation.track_goal.mdp as mdp + +from ... import track_goal_env + + +class SceneCfg(track_goal_env.SceneCfg): + ee_frame = tycho.FRAME_EE + robot = tycho.HEBI_IMPLICIT_ACTUATOR_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + frame_fixed_chop_tip = tycho.FRAME_FIXED_CHOP_TIP + frame_free_chop_tip = tycho.FRAME_FREE_CHOP_TIP + + +@configclass +class TerminationsCfg(track_goal_env.TerminationsCfg): + """Termination terms for the MDP.""" + + robot_invalid_state = DoneTerm(func=mdp.invalid_state, params={"asset_cfg": SceneEntityCfg("robot")}) + + robot_extremely_bad_posture = DoneTerm( + func=tycho_mdp.terminate_extremely_bad_posture, + params={"probability": 0.5, "robot_cfg": SceneEntityCfg("robot")}, + ) + + +@configclass +class GoalTrackingTychoEnv(track_goal_env.TrackGoalEnv): + scene: SceneCfg = SceneCfg(num_envs=1, env_spacing=2.5, replicate_physics=False) + terminations: TerminationsCfg = TerminationsCfg() + + def __post_init__(self): + super().__post_init__() + self.decimation = 4 + self.sim.dt = 0.02 / self.decimation + self.commands.ee_pose.body_name = "static_chop_tip" + self.commands.ee_pose.ranges.pos_x = (-0.45, 0.0) + self.commands.ee_pose.ranges.pos_y = (-0.325, -0.1) + self.commands.ee_pose.ranges.pos_z = (0.05, 0.3) + self.commands.ee_pose.ranges.roll = (1.57, 1.57) + self.commands.ee_pose.ranges.pitch = (3.14, 3.14) + self.commands.ee_pose.ranges.yaw = (-1.0, 1.0) + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = "static_chop_tip" + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = "static_chop_tip" + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = "static_chop_tip" + self.rewards.end_effector_orientation_tracking_fine_grained.params["asset_cfg"].body_names = "static_chop_tip" + + +@configclass +class GoalTrackingTychoIkdelta(GoalTrackingTychoEnv): + actions: tycho.IkdeltaAction = tycho.IkdeltaAction() + + +@configclass +class GoalTrackingTychoIkabsolute(GoalTrackingTychoEnv): + actions: tycho.IkabsoluteAction = tycho.IkabsoluteAction() + + +@configclass +class GoalTrackingTychoJointPosition(GoalTrackingTychoEnv): + actions: tycho.JointPositionAction = tycho.JointPositionAction() diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/__init__.py new file mode 100644 index 0000000..7fbc864 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +gym.register( + id="UW-Track-Goal-Ur5-IkAbs-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_ur5_env_cfg:TrackGoalUr5IkAbsEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + "teleop_cfg_entry_point": "uwlab_assets.robots.ur5.teleop:Ur5TeleopCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-Track-Goal-Ur5-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_ur5_env_cfg:TrackGoalUr5JointPositionEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..f4ac2fa --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/agents/rsl_rl_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class Base_PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 96 + max_iterations = 1500 + save_interval = 50 + resume = False + experiment_name = "track_goal_base_agent" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/track_goal_ur5_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/track_goal_ur5_env_cfg.py new file mode 100644 index 0000000..311a51b --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/ur5/track_goal_ur5_env_cfg.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.ur5 as ur5 + +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass + +from ... import mdp, track_goal_env + + +@configclass +class Ur5SceneCfg(track_goal_env.SceneCfg): + robot = ur5.IMPLICIT_UR5.replace(prim_path="{ENV_REGEX_NS}/Robot") + + +@configclass +class Ur5TermincationCfg(track_goal_env.TerminationsCfg): + robot_invalid_state = DoneTerm(func=mdp.abnormal_robot_state) + + +@configclass +class TrackGoalUr5EnvCfg(track_goal_env.TrackGoalEnv): + scene: Ur5SceneCfg = Ur5SceneCfg(num_envs=1, env_spacing=2.5, replicate_physics=False) + terminations: Ur5TermincationCfg = Ur5TermincationCfg() + + def __post_init__(self): + # simulation settings + super().__post_init__() + self.decimation = 4 + self.sim.dt = 0.02 / self.decimation + self.viewer.eye = (3.0, 3.0, 1.0) + + # Contact and solver settings + self.sim.physx.solver_type = 1 + # Render settings + self.sim.render.enable_dlssg = True + self.sim.render.enable_ambient_occlusion = True + self.sim.render.enable_reflections = True + self.sim.render.enable_dl_denoiser = True + self.commands.ee_pose.body_name = "robotiq_base_link" + self.commands.ee_pose.ranges.pos_x = (-0.5, -0.25) + self.commands.ee_pose.ranges.pitch = (0.5, 1.57) + self.commands.ee_pose.ranges.yaw = (1.57, 4.61) + + self.events.reset_robot_joint = None # don't reset robotiq joints, it may cause robot to fail + + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = "robotiq_base_link" + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = "robotiq_base_link" + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = "robotiq_base_link" + self.rewards.end_effector_orientation_tracking_fine_grained.params["asset_cfg"].body_names = "robotiq_base_link" + self.rewards.end_effector_orientation_tracking.weight *= 2.0 + + +@configclass +class TrackGoalUr5IkAbsEnvCfg(TrackGoalUr5EnvCfg): + actions: ur5.Ur5IkAbsoluteAction = ur5.Ur5IkAbsoluteAction() + + +@configclass +class TrackGoalUr5JointPositionEnvCfg(TrackGoalUr5EnvCfg): + actions: ur5.Ur5JointPositionAction = ur5.Ur5JointPositionAction() diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/__init__.py new file mode 100644 index 0000000..0610339 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/__init__.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +""" +XarmLeap +""" +gym.register( + id="UW-TrackGoal-XarmLeap-JointPos-Deployment-v0", + entry_point="uwlab.envs.real_rl_env:RealRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_xarm_leap:RealRLEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-TrackGoal-XarmLeap-JointPos-Viz-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_xarm_leap:TrackGoalXarmLeapVizJointPosition", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-TrackGoal-XarmLeap-JointPos-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_xarm_leap:TrackGoalXarmLeapJointPosition", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="UW-TrackGoal-XarmLeap-IkAbs-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_xarm_leap:TrackGoalXarmLeapMcIkAbs", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + "teleop_cfg_entry_point": "uwlab_assets.robots.xarm_leap.teleop:XarmLeapTeleopCfg", + }, + disable_env_checker=True, +) + + +gym.register( + id="UW-TrackGoal-XarmLeap-IkDel-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.track_goal_xarm_leap:TrackGoalXarmLeapMcIkDel", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:Base_PPORunnerCfg", + }, + disable_env_checker=True, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..3e7865f --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/agents/rsl_rl_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class Base_PPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 + max_iterations = 1500 + save_interval = 50 + resume = False + experiment_name = "track_goal_agent" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap.py new file mode 100644 index 0000000..dab86be --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap.py @@ -0,0 +1,215 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.leap as leap +import uwlab_assets.robots.leap.mdp as leap_mdp +import uwlab_assets.robots.xarm_leap as xarm_leap +from uwlab_assets import UWLAB_CLOUD_ASSETS_DIR + +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass +from uwlab.envs.mdp.actions import VisualizableJointTargetPositionCfg + +import uwlab_tasks.manager_based.manipulation.track_goal.mdp as mdp + +from ... import track_goal_env + + +@configclass +class SceneCfg(track_goal_env.SceneCfg): + robot = xarm_leap.IMPLICIT_XARM_LEAP.replace(prim_path="{ENV_REGEX_NS}/Robot") + + hand_command_articulation_vis = leap.IMPLICIT_LEAP.replace( + prim_path="{ENV_REGEX_NS}/HANDVIS", + spawn=leap.IMPLICIT_LEAP.spawn.replace( + usd_path=f"{UWLAB_CLOUD_ASSETS_DIR}/Robots/LeapHand/leap_transparent.usd", + ), + ) + + xarm_leap_action_vis = xarm_leap.IMPLICIT_XARM_LEAP.replace( + prim_path="{ENV_REGEX_NS}/ROBOTACTIONVIZ", + spawn=xarm_leap.IMPLICIT_XARM_LEAP.spawn.replace( + usd_path=f"{UWLAB_CLOUD_ASSETS_DIR}/Robots/UFactory/Xarm5LeapHand/leap_xarm_visual.usd", + ), + init_state=xarm_leap.IMPLICIT_XARM_LEAP.init_state.replace(pos=(0.0, 0.0, 0.0)), + ) + + +@configclass +class EventCfg: + randomize_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot"), + "mass_distribution_params": (0.9, 1.1), + "operation": "scale", + }, + ) + + robot_joint_stiffness_and_damping = EventTerm( + func=mdp.randomize_actuator_gains, + min_step_count_between_reset=720, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}}, + ) + + reset_robot_joint = EventTerm( + func=mdp.reset_joints_by_scale, + params={ + "asset_cfg": SceneEntityCfg("robot"), + "position_range": (-0.5, 0.5), + "velocity_range": (-0.1, 0.1), + }, + mode="reset", + ) + + +@configclass +class ObservationCfg(track_goal_env.ObservationsCfg): + @configclass + class PolicyCfg(track_goal_env.ObservationsCfg.PolicyCfg): + """Observations for policy group.""" + + hand_target = ObsTerm(func=mdp.generated_commands, params={"command_name": "hand_posture"}) + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class RewardCfg(track_goal_env.RewardsCfg): + hand_joint_command_tracking = RewTerm( + func=mdp.joint_position_command_error_l2_norm, + weight=-1.5, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names="j[0-9]+"), + "command_name": "hand_posture", + }, + ) + + stay_still = RewTerm( + func=mdp.stay_still, + weight=-0.01, + params={ + "ee_command_name": "ee_pose", + "hand_command_name": "hand_posture", + "hand_asset_cfg": SceneEntityCfg("robot", joint_names="j[0-9]+"), + "ee_asset_cfg": SceneEntityCfg("robot", body_names="palm_lower"), + }, + ) + + delta_action_l2 = RewTerm( + func=mdp.delta_action_l2, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + + +@configclass +class CommandCfg(track_goal_env.CommandsCfg): + hand_posture = mdp.HandJointCommandCfg( + debug_vis=True, + wrist_pose_term="ee_pose", + asset_cfg=SceneEntityCfg("robot", joint_names=["j[0-9]+"]), + articulation_vis_cfg=SceneEntityCfg("hand_command_articulation_vis", joint_names=["j[0-9]+"]), + resampling_time_range=(1.5, 2.5), + predefined_hand_joint_goals=[ + [0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], + [1.25, 0.50, 0.60, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], + [0.00, 0.50, 0.75, 1.25, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], + [1.25, 0.50, 0.60, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 1.00, 1.00, 0.00, 0.00, 0.00, 0.00], + [0.00, 0.50, 0.75, 1.25, 0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 1.00, 1.00, 0.00, 0.00, 0.00, 0.00], + ], + ) + + +@configclass +class TrackGoalXarmLeap(track_goal_env.TrackGoalEnv): + scene: SceneCfg = SceneCfg(num_envs=1, env_spacing=2.5, replicate_physics=False) + events: EventCfg = EventCfg() + observations: ObservationCfg = ObservationCfg() + commands: CommandCfg = CommandCfg() + rewards: RewardCfg = RewardCfg() + + def __post_init__(self): + super().__post_init__() + self.decimation = 5 + self.sim.dt = 0.02 / self.decimation + self.commands.ee_pose.body_name = "palm_lower" + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = "palm_lower" + self.rewards.end_effector_orientation_tracking_fine_grained.params["asset_cfg"].body_names = "palm_lower" + self.rewards.action_rate.weight *= 5.0 + self.rewards.end_effector_orientation_tracking.weight *= 2.0 + self.rewards.end_effector_position_tracking.weight *= 1.5 + self.rewards.end_effector_position_tracking_fine_grained.weight *= 1.5 + if not self.commands.hand_posture.debug_vis: + self.scene.hand_command_articulation_vis = None + if not hasattr(self.actions, "jointpos_viz") or (self.actions.jointpos_viz.debug_vis is False): + self.scene.xarm_leap_action_vis = None + if self.commands.hand_posture.debug_vis or ( + hasattr(self.actions, "jointpos_viz") and self.actions.jointpos_viz.debug_vis + ): + # this is necessary to visualize opacity in the raytracing + import carb + import isaacsim.core.utils.carb as carb_utils + + self.sim.render.enable_translucency = True + # # Access the Carb settings registry + settings = carb.settings.get_settings() + carb_utils.set_carb_setting(settings, "/rtx/raytracing/fractionalCutoutOpacity", True) + + +@configclass +class TargetVizableJointPositionAction(xarm_leap.XarmLeapJointPositionAction): + jointpos_viz: VisualizableJointTargetPositionCfg = VisualizableJointTargetPositionCfg( + debug_vis=False, + asset_name="robot", + joint_names=["joint.*", "j[0-9]+"], + articulation_vis_cfg=SceneEntityCfg("xarm_leap_action_vis"), + ) + + leap_action_correction = leap_mdp.LeapJointPositionActionCorrectionCfg( + asset_name="robot", + joint_names=["joint.*", "j[0-9]+"], + ) + + +@configclass +class TrackGoalXarmLeapVizJointPosition(TrackGoalXarmLeap): + actions: TargetVizableJointPositionAction = TargetVizableJointPositionAction() # type: ignore + + +@configclass +class TrackGoalXarmLeapJointPosition(TrackGoalXarmLeap): + actions = xarm_leap.XarmLeapJointPositionAction() # type: ignore + + +@configclass +class TrackGoalXarmLeapMcIkAbs(TrackGoalXarmLeap): + actions = xarm_leap.XarmLeapMcIkAbsoluteAction() # type: ignore + + +@configclass +class TrackGoalXarmLeapMcIkDel(TrackGoalXarmLeap): + actions = xarm_leap.XarmLeapMcIkDeltaAction() # type: ignore diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap_deployment.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap_deployment.py new file mode 100644 index 0000000..e879098 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/config/xarm_leap/track_goal_xarm_leap_deployment.py @@ -0,0 +1,203 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import torch + +from uwlab_assets.robots.leap.articulation_drive.dynamixel_driver_cfg import DynamixelDriverCfg +from uwlab_assets.robots.xarm.articulation_drive.xarm_driver_cfg import XarmDriverCfg + +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.envs.mdp.actions.actions_cfg import JointPositionActionCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass +from uwlab.assets import ArticulationCfg +from uwlab.assets.articulation import BulletArticulationViewCfg +from uwlab.envs import RealRLEnvCfg +from uwlab.scene import SceneContextCfg + +import uwlab_tasks.manager_based.manipulation.track_goal.mdp as mdp + + +@configclass +class SceneCfg(SceneContextCfg): + robot = ArticulationCfg( + articulation_view_cfg=BulletArticulationViewCfg( + drive_cfg=XarmDriverCfg( + ip="192.168.1.220", + work_space_limit=[[0.2, 0.6], [-0.325, 0.325], [0.15, 0.5]], + is_radian=True, + p_gain_scaler=0.01, + ), + isaac_joint_names=["joint1", "joint2", "joint3", "joint4", "joint5"], + urdf="/path/to/xarm5.urdf", + dt=0.01, # recommended hz is 30 - 300hz + debug_visualize=False, + # dummy_mode=True, + use_multiprocessing=True, + ), + init_state=ArticulationCfg.InitialStateCfg(joint_pos={"joint1": 0.0, "joint3": -0.5}), + actuators={"xarm": ImplicitActuatorCfg(joint_names_expr=["joint.*"], stiffness=0.0, damping=0.0)}, + ) + + hand = ArticulationCfg( + articulation_view_cfg=BulletArticulationViewCfg( + drive_cfg=DynamixelDriverCfg(port="/dev/ttyUSB0"), + urdf="/path/to/leap.urdf", + # fmt: off + isaac_joint_names=[ + 'j1', 'j12', 'j5', 'j9', 'j0', 'j13', 'j4', 'j8', 'j2', 'j14', 'j6', 'j10', 'j3', 'j15', 'j7', 'j11' + ], + # fmt: on + debug_visualize=False, + use_multiprocessing=True, + dt=0.01, + # dummy_mode=True, + ), + init_state=ArticulationCfg.InitialStateCfg(joint_pos={"j.*": 0.0}), + actuators={ + "mcp_joints": ImplicitActuatorCfg(joint_names_expr=["j[048]"], stiffness=50, damping=50), + "other_joints": ImplicitActuatorCfg(joint_names_expr=["j(?![048]).*"], stiffness=80, damping=50), + }, + ) + + +@configclass +class ActionCfg: + xarm_joint_position_action = JointPositionActionCfg( + asset_name="robot", joint_names=["joint.*"], scale=1.0, use_default_offset=False + ) + + leap_hand_joint_position_action = JointPositionActionCfg( + asset_name="hand", joint_names=["j.*"], scale=1.0, use_default_offset=False + ) + + +@configclass +class ObservationsCfg: + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + xarm_joint_pos = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}) + hand_joint_pos = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("hand")}) + + xarm_joint_vel = ObsTerm(func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}) + hand_joint_vel = ObsTerm(func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("hand")}) + + target_object_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"}) + actions = ObsTerm(func=mdp.last_action) + # hand_target = ObsTerm(func=mdp.generated_commands, params={"command_name": "hand_posture"}) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name="link_eef", + resampling_time_range=(1.5, 2.5), + debug_vis=False, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.3, 0.5), + pos_y=(-0.225, 0.225), + pos_z=(0.125, 0.4), + roll=(0, 0.0), + pitch=(0, 1), + yaw=(0.0, 1), + ), + ) + + +@configclass +class EventCfg: + reset_robot_joint = EventTerm( + func=mdp.reset_joints_by_offset, + params={ + "asset_cfg": SceneEntityCfg("robot"), + "position_range": [0.0, 0.0], + "velocity_range": [0.0, 0.0], + }, + mode="reset", + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + pass + # action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + + # joint_vel = RewTerm( + # func=mdp.joint_vel_l2, + # weight=-0.0001, + # params={"asset_cfg": SceneEntityCfg("robot")}, + # ) + + # end_effector_position_tracking = RewTerm( + # func=mdp.link_position_command_align_tanh, + # weight=0.5, + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + # "std": 0.5, + # "command_name": "ee_pose", + # }, + # ) + + # end_effector_position_tracking_fine_grained = RewTerm( + # func=mdp.link_position_command_align_tanh, + # weight=1, + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + # "std": 0.1, + # "command_name": "ee_pose", + # }, + # ) + + # end_effector_orientation_tracking = RewTerm( + # func=mdp.link_orientation_command_align_tanh, + # weight=0.5, + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + # "std": 0.5, + # "command_name": "ee_pose", + # }, + # ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +@configclass +class TrackGoalXarmLeapDeployment(RealRLEnvCfg): + device: str = "cuda:0" if torch.cuda.is_available() else "cpu" + scene: SceneCfg = SceneCfg() + actions: ActionCfg = ActionCfg() + commands: CommandsCfg = CommandsCfg() + observations: ObservationsCfg = ObservationsCfg() + events: EventCfg = EventCfg() + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + + def __post_init__(self): + self.decimation = 1 + self.episode_length_s = 50 + self.scene.dt = 0.02 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/__init__.py new file mode 100644 index 0000000..0d89a2a --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.envs.mdp import * +from uwlab.envs.mdp import * + +from .command_cfg import HandJointCommandCfg +from .rewards import * diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command.py new file mode 100644 index 0000000..f8e0155 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command.py @@ -0,0 +1,118 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-module containing command generators for hand joint tracking.""" + +from __future__ import annotations + +import torch +from collections.abc import Sequence +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation +from isaaclab.managers import CommandTerm +from isaaclab.utils.math import combine_frame_transforms + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + from .command_cfg import HandJointCommandCfg + + +class HandJointCommand(CommandTerm): + cfg: HandJointCommandCfg + + def __init__(self, cfg: HandJointCommandCfg, env: ManagerBasedRLEnv): + """Initialize the command generator class. + + Args: + cfg: The configuration parameters for the command generator. + env: The environment object. + """ + # initialize the base class + self.env = env + super().__init__(cfg, env) + # extract the robot and body index for which the command is generated + self.robot: Articulation = env.scene[cfg.asset_cfg.name] + self.joint_indices = self.robot.find_joints(cfg.asset_cfg.joint_names)[0] # type: ignore + self.predefined_hand_joint_goals = torch.tensor(cfg.predefined_hand_joint_goals, device=self.device) + self.current_joint_goal = torch.zeros((self.num_envs, len(self.joint_indices)), device=self.device) + + r = torch.randint(0, len(self.predefined_hand_joint_goals), (self.num_envs,)) + self.current_joint_goal[:] = self.predefined_hand_joint_goals[r] + # -- metrics + self.metrics["joint_position_error"] = torch.zeros(self.num_envs, device=self.device) + + def __str__(self) -> str: + msg = "UniformPoseCommand:\n" + msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" + msg += f"\tResampling time range: {self.cfg.resampling_time_range}\n" + return msg + + """ + Properties + """ + + @property + def command(self) -> torch.Tensor: + """The desired pose command. Shape is (num_envs, 7). + + The first three elements correspond to the position, followed by the quaternion orientation in (w, x, y, z). + """ + return self.current_joint_goal + + """ + Implementation specific functions. + """ + + def _update_metrics(self): + # transform command from base frame to simulation world frame + current_joint_pos = self.robot.data.joint_pos[:, self.joint_indices] + self.metrics["position_error"] = torch.norm(self.current_joint_goal - current_joint_pos, dim=-1) + + def _resample_command(self, env_ids: Sequence[int]): + # sample new joint targets + + r = torch.randint(0, len(self.predefined_hand_joint_goals), (len(env_ids),)) + self.current_joint_goal[env_ids] = self.predefined_hand_joint_goals[r] + + def _update_command(self): + pass + + def _set_debug_vis_impl(self, debug_vis: bool): + # create markers if necessary for the first tome + import isaacsim.core.utils.prims as prim_utils + from pxr import UsdGeom + + if debug_vis: + if not hasattr(self, "vis_articulation"): + if self.cfg.articulation_vis_cfg.name in self.env.scene.keys(): # noqa: SIM118 + self.vis_articulation: Articulation = self.env.scene[self.cfg.articulation_vis_cfg.name] + prims_paths = prim_utils.find_matching_prim_paths(self.vis_articulation.cfg.prim_path) + prims = [prim_utils.get_prim_at_path(prim) for prim in prims_paths] + for prim in prims: + UsdGeom.Imageable(prim).MakeVisible() + # VisualizationMarkers + else: + if hasattr(self, "vis_articulation"): + prims_paths = prim_utils.find_matching_prim_paths(self.vis_articulation.cfg.prim_path) + prims = [prim_utils.get_prim_at_path(prim) for prim in prims_paths] + for prim in prims: + UsdGeom.Imageable(prim).MakeInvisible() + + def _debug_vis_callback(self, event): + # update the box marker + writ_pose_command: CommandTerm = self.env.command_manager.get_term(self.cfg.wrist_pose_term) + pose_command_b = writ_pose_command.command + pos_command_w, quat_command_w = combine_frame_transforms( + self.robot.data.root_pos_w, + self.robot.data.root_quat_w, + pose_command_b[:, :3], + pose_command_b[:, 3:], + ) + self.vis_articulation.write_root_pose_to_sim(torch.cat([pos_command_w, quat_command_w], dim=1)) + self.vis_articulation.write_joint_state_to_sim( + position=self.current_joint_goal, velocity=torch.zeros(self.current_joint_goal.shape, device=self.device) + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command_cfg.py new file mode 100644 index 0000000..b55bf6f --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/command_cfg.py @@ -0,0 +1,28 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from dataclasses import MISSING + +from isaaclab.managers import CommandTermCfg, SceneEntityCfg +from isaaclab.utils import configclass + +from .command import HandJointCommand + + +@configclass +class HandJointCommandCfg(CommandTermCfg): + """Configuration for uniform pose command generator.""" + + class_type: type = HandJointCommand + + asset_cfg: SceneEntityCfg = MISSING # type: ignore + + articulation_vis_cfg: SceneEntityCfg = MISSING # type: ignore + + predefined_hand_joint_goals: list[list[float]] = MISSING # type: ignore + + wrist_pose_term: str = MISSING # type: ignore diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/rewards.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/rewards.py new file mode 100644 index 0000000..467bfa0 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/mdp/rewards.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import math as math_utils + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def stay_still( + env: ManagerBasedRLEnv, + ee_command_name: str, + hand_command_name: str, + hand_asset_cfg: SceneEntityCfg, + ee_asset_cfg: SceneEntityCfg, +) -> torch.Tensor: + asset: Articulation = env.scene[hand_asset_cfg.name] + ee_command = env.command_manager.get_command(ee_command_name) + hand_command = env.command_manager.get_command(hand_command_name) + cur_hand_joint_pos = asset.data.joint_pos[:, hand_asset_cfg.joint_ids] + hand_error = torch.norm(hand_command - cur_hand_joint_pos, dim=1) + des_pos_w, _ = math_utils.combine_frame_transforms( + asset.data.root_state_w[:, :3], asset.data.root_state_w[:, 3:7], ee_command[:, :3] + ) + curr_pos_w = asset.data.body_link_pos_w[:, ee_asset_cfg.body_ids, :3].view(-1, 3) + distance = torch.norm(curr_pos_w - des_pos_w, dim=1) + goal_reached_mask = (distance < 0.1) & (hand_error < 0.4) + + return torch.where(goal_reached_mask, asset.data.body_vel_w.abs().sum(2).sum(1), 0) + + +def delta_action_l2( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, +) -> torch.Tensor: + asset: Articulation = env.scene[asset_cfg.name] + delta_action = env.action_manager.action - asset.data.joint_pos + return torch.sum(torch.square(delta_action), dim=1) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/track_goal_env.py b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/track_goal_env.py new file mode 100644 index 0000000..d4f7f15 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/manipulation/track_goal/track_goal_env.py @@ -0,0 +1,202 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING + +from uwlab_assets import UWLAB_CLOUD_ASSETS_DIR + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.envs.mdp import time_out +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass + +from . import mdp + + +class SceneCfg(InteractiveSceneCfg): + robot: ArticulationCfg = MISSING # type: ignore + + table = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.4, 0.0, -0.868), rot=(0.707, 0.0, 0.0, -0.707)), + spawn=sim_utils.UsdFileCfg(usd_path=f"{UWLAB_CLOUD_ASSETS_DIR}/Props/Mounts/UWPatVention/pat_vention.usd"), + ) + + # override ground plane + ground = AssetBaseCfg( + prim_path="/World/GroundPlane", + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -0.868)), + spawn=sim_utils.GroundPlaneCfg(), + ) + # lights + dome_light = AssetBaseCfg( + prim_path="/World/Light", spawn=sim_utils.DomeLightCfg(intensity=1500.0, color=(0.75, 0.75, 0.75)) + ) + + +@configclass +class ObservationsCfg: + """""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + joint_pos = ObsTerm(func=mdp.joint_pos_rel) + joint_vel = ObsTerm(func=mdp.joint_vel_rel) + target_object_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"}) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + pass + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name="REPLACE_ME", + resampling_time_range=(1.5, 2.5), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.3, 0.6), + pos_y=(-0.325, 0.325), + pos_z=(0.125, 0.5), + roll=(0, 0.0), + pitch=(0, 1), + yaw=(0.0, 1), + ), + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + + joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.0001, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + + end_effector_position_tracking = RewTerm( + func=mdp.link_position_command_align_tanh, + weight=0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + "std": 0.5, + "command_name": "ee_pose", + }, + ) + + end_effector_position_tracking_fine_grained = RewTerm( + func=mdp.link_position_command_align_tanh, + weight=1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + "std": 0.1, + "command_name": "ee_pose", + }, + ) + + end_effector_orientation_tracking = RewTerm( + func=mdp.link_orientation_command_align_tanh, + weight=0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + "std": 0.5, + "command_name": "ee_pose", + }, + ) + + end_effector_orientation_tracking_fine_grained = RewTerm( + func=mdp.link_orientation_command_align_tanh, + weight=1.0, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="REPLACE_ME"), + "std": 0.05, + "command_name": "ee_pose", + }, + ) + + +@configclass +class EventCfg: + reset_everything = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_robot_joint = EventTerm( + func=mdp.reset_joints_by_offset, + params={ + "asset_cfg": SceneEntityCfg("robot"), + "position_range": [-0.2, 0.2], + "velocity_range": [-0.1, 0.1], + }, + mode="reset", + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=time_out, time_out=True) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + pass + + +@configclass +class TrackGoalEnv(ManagerBasedRLEnvCfg): + + # Scene settings + scene: SceneCfg = SceneCfg(num_envs=1, env_spacing=2.5, replicate_physics=False) + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = MISSING # type: ignore + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + self.decimation = 2 + self.episode_length_s = 50 + # simulation settings + self.sim.dt = 0.02 / self.decimation + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 1024 * 1024 * 4 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 + self.sim.physx.gpu_max_rigid_patch_count = 5 * 2**16 + + self.sim.render.enable_ambient_occlusion = True + self.sim.render.enable_reflections = True + self.sim.render.enable_dlssg = True