diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/__init__.py new file mode 100644 index 0000000..bcd1f3c --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Locomotion environments for legged robots.""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/__init__.py new file mode 100644 index 0000000..1eb3dbe --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Advanced Skill Environments. +Reference: + https://github.com/leggedrobotics/legged_gym +""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_base_env.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_base_env.py new file mode 100644 index 0000000..4660c33 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_base_env.py @@ -0,0 +1,315 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ViewerCfg +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.sensors import ContactSensorCfg, RayCasterCfg, patterns +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise +from uwlab.envs.data_manager_based_rl import DataManagerBasedRLEnvCfg +from uwlab.scene import InteractiveSceneCfg +from uwlab.terrains import TerrainGeneratorCfg, TerrainImporterCfg + +import uwlab_tasks.manager_based.locomotion.advance_skills.mdp as mdp + + +@configclass +class SceneCfg(InteractiveSceneCfg): + """ "Configuration for the terrain scene with a legged robot.""" + + # ground terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=TerrainGeneratorCfg( + size=(10.0, 10.0), + border_width=20.0, + num_rows=10, + num_cols=20, + horizontal_scale=0.1, + vertical_scale=0.005, + slope_threshold=0.75, + use_cache=False, + sub_terrains={}, + ), + max_init_terrain_level=5, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=False, + ) + + # lights + sky_light = AssetBaseCfg( + prim_path="/World/skyLight", + spawn=sim_utils.DomeLightCfg( + intensity=750.0, + texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr", + ), + ) + + # robots + robot: ArticulationCfg = MISSING # type: ignore + + # sensors + height_scanner = RayCasterCfg( + prim_path="{ENV_REGEX_NS}/Robot/base", + offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 20.0)), + attach_yaw_only=True, + pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=(1.6, 1.0)), + debug_vis=False, + mesh_prim_paths=["/World/ground"], + ) + contact_forces = ContactSensorCfg( + prim_path="{ENV_REGEX_NS}/Robot/.*", history_length=3, track_air_time=True, debug_vis=True + ) + + +@configclass +class ActionsCfg: + """Actions for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True) + + +@configclass +class CommandsCfg: + "Command specifications for the MDP." + goal_point = mdp.TerrainBasedPose2dCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + simple_heading=False, + debug_vis=True, + ranges=mdp.TerrainBasedPose2dCommandCfg.Ranges( + heading=(-3.14, 3.14), + ), + ) + + +@configclass +class ObservationsCfg: + """Observations for the MDP""" + + @configclass + class PolicyCfg(ObsGroup): + base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1)) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel, noise=Unoise(n_min=-0.2, n_max=0.2)) + proj_gravity = ObsTerm(func=mdp.projected_gravity, noise=Unoise(n_min=-0.05, n_max=0.05)) + goal_point_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "goal_point"}) + time_left = ObsTerm(func=mdp.time_left) + joint_pos = ObsTerm(func=mdp.joint_pos, noise=Unoise(n_min=-0.01, n_max=0.01)) + joint_vel = ObsTerm(func=mdp.joint_vel, noise=Unoise(n_min=-1.5, n_max=1.5)) + last_actions = ObsTerm(func=mdp.last_action) + height_scan = ObsTerm( + func=mdp.height_scan, + params={"sensor_cfg": SceneEntityCfg("height_scanner")}, + noise=Unoise(n_min=-0.1, n_max=0.1), + clip=(-1.0, 1.0), + ) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventsCfg: + # startup + physical_material = EventTerm( + func=mdp.randomize_rigid_body_material, # type: ignore + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="base"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "z": (-0.5, 0.5), + "roll": (-0.5, 0.5), + "pitch": (-0.5, 0.5), + "yaw": (-0.5, 0.5), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + # interval + # comment for pit and gap + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(3.0, 4.5), + params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}}, + ) + + +@configclass +class RewardsCfg: + + # task rewards, eq. 1 + task_reward = RewTerm(func=mdp.task_reward, weight=10.0, params={"reward_window": 1.0}) + heading_reward = RewTerm( + func=mdp.heading_tracking, weight=10.0, params={"distance_threshold": 0.5, "reward_window": 1.0} + ) + + # penalties, eq. 2 + joint_accel_l2 = RewTerm(func=mdp.joint_acc_l2, weight=-2.5e-7) + joint_torque_l2 = RewTerm(func=mdp.joint_torques_l2, weight=-1.0e-5) + undesired_contact = RewTerm( + func=mdp.undesired_contacts, + weight=-1.0, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), + "threshold": 1.0, + }, + ) + + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + feet_lin_acc_l2 = RewTerm( + func=mdp.feet_lin_acc_l2, weight=-4e-6, params={"robot_cfg": SceneEntityCfg("robot", body_names=".*FOOT")} + ) + feet_rot_acc_l2 = RewTerm( + func=mdp.feet_rot_acc_l2, weight=-2e-7, params={"robot_cfg": SceneEntityCfg("robot", body_names=".*FOOT")} + ) + + illegal_contact_penalty = RewTerm( + func=mdp.illegal_contact_penalty, + weight=-3, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="base"), "threshold": 1.0}, + ) + + # exploration eq. 3 + exploration = RewTerm(func=mdp.exploration_reward, weight=1.0) + + # stalling penalty eq. 4 + stalling = RewTerm(func=mdp.stall_penalty, weight=-1.5, params={"distance_threshold": 0.2}) + + +@configclass +class TerminationsCfg: + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + robot_drop = DoneTerm( + func=mdp.root_height_below_minimum, + params={ + "minimum_height": -20, + }, + ) + + base_contact = DoneTerm( + func=mdp.illegal_contact, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names="base"), + "threshold": 1.0, + }, + ) + + +@configclass +class CurriculumCfg: + terrain_levels = CurrTerm(func=mdp.terrain_levels_vel) # type: ignore + + +@configclass +class AdvanceSkillsBaseEnvCfg(DataManagerBasedRLEnvCfg): + scene: SceneCfg = SceneCfg(num_envs=4096, env_spacing=10) + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventsCfg = EventsCfg() + curriculum: CurriculumCfg = CurriculumCfg() + viewer: ViewerCfg = ViewerCfg(eye=(1.0, 2.0, 2.0), origin_type="asset_body", asset_name="robot", body_name="base") + + def __post_init__(self): + self.decimation = 4 + self.episode_length_s = 6.0 + self.sim.dt = 0.005 + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material = self.scene.terrain.physics_material + self.sim.physx.gpu_total_aggregate_pairs_capacity = 2**24 + self.sim.physx.gpu_found_lost_pairs_capacity = 2**24 + self.sim.physx.gpu_collision_stack_size = 2**27 + self.sim.physx.gpu_max_rigid_patch_count = 5 * 2**16 + + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + if self.scene.height_scanner is not None: + self.scene.height_scanner.update_period = self.decimation * self.sim.dt + if self.scene.contact_forces is not None: + self.scene.contact_forces.update_period = self.sim.dt + + # check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator + # this generates terrains with increasing difficulty and is useful for training + if getattr(self.curriculum, "terrain_levels", None) is not None: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = True + else: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = False diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_env.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_env.py new file mode 100644 index 0000000..8480861 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/advance_skills_env.py @@ -0,0 +1,56 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .advance_skills_base_env import AdvanceSkillsBaseEnvCfg +from .terrains import EXTREME_STAIR, GAP, IRREGULAR_PILLAR_OBSTACLE, PIT, SLOPE_INV, SQUARE_PILLAR_OBSTACLE + + +class GapEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"gap": GAP} + + +class PitEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"pit": PIT} + + +class ExtremeStairEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"pit": EXTREME_STAIR} + + +class SlopeInvEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"slope_inv": SLOPE_INV} + + +class SquarePillarObstacleEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"square_pillar_obstacle": SQUARE_PILLAR_OBSTACLE} + + +class IrregularPillarObstacleEnvConfig(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = {"irregular_pillar_obstacle": IRREGULAR_PILLAR_OBSTACLE} + + +class AdvanceSkillsEnvCfg(AdvanceSkillsBaseEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_generator.sub_terrains = { + "gap": GAP, + "pit": PIT, + "extreme_stair": EXTREME_STAIR, + "slope_inv": SLOPE_INV, + "square_pillar_obstacle": SQUARE_PILLAR_OBSTACLE, + "irregular_pillar_obstacle": IRREGULAR_PILLAR_OBSTACLE, + } diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/__init__.py new file mode 100644 index 0000000..b33ce65 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" Advanced Skill Environments. + +Reference: + https://github.com/leggedrobotics/legged_gym +""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/__init__.py new file mode 100644 index 0000000..3dfa5ae --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/__init__.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +gym.register( + id="UW-Position-Advance-Skills-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:AdvanceSkillsSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Pit-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:PitSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Gap-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:GapSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Inv-Slope-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:SlopeInvSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Extreme-Stair-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:ExtremeStairSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Square-Obstacle-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:SquarePillarObstacleSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Irregular-Obstacle-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:IrregularPillarObstacleSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..bf4684d --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/agents/rsl_rl_cfg.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg +from uwlab_rl.rsl_rl import RslRlFancyPpoAlgorithmCfg, SymmetryCfg + +from ..augment import aug_func + + +@configclass +class AdvanceSkillsSpotPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 + max_iterations = 2000 + save_interval = 100 + resume = False + experiment_name = "spot_advance_skills" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlFancyPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=SymmetryCfg(use_data_augmentation=True, use_mirror_loss=False, data_augmentation_func=aug_func), + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/augment.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/augment.py new file mode 100644 index 0000000..365c440 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/augment.py @@ -0,0 +1,101 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import torch + + +def aug_observation(obs: torch.Tensor) -> torch.Tensor: + """ + obs: num_steps_per_env * num_envs // num_mini_batches, 313 + ///////////////////////////////////////// + 0: [0:3] (3,) 'base_lin_vel' + 1: [3:6] (3,) 'base_ang_vel' + 2: [6:9] (3,) 'proj_gravity' + 3: [9:13] (4,) 'concatenate_cmd' + 4: [13:14] (1,) 'time_left' + 5: [14:26] (12,) 'joint_pos' + 6: [26:38] (12,) 'joint_vel' + 7: [38:50] (12,) 'last_actions' + 8: [50:] (264,) 'height_scan' + //////////////////////////////////////// + + """ + B = obs.shape[0] + new_obs = obs.repeat(2, 1) + + # 0-2 base lin vel + new_obs[B : 2 * B, 1] = -new_obs[B : 2 * B, 1] + # 3-5 base ang vel + new_obs[B : 2 * B, 3] = -new_obs[B : 2 * B, 3] + new_obs[B : 2 * B, 5] = -new_obs[B : 2 * B, 5] + # 6-8 proj gravity + new_obs[B : 2 * B, 7] = -new_obs[B : 2 * B, 7] + # 9-13 cmd + new_obs[B : 2 * B, 10] = -new_obs[B : 2 * B, 10] + new_obs[B : 2 * B, 12] = -new_obs[B : 2 * B, 12] + + # X-symmetry: + # 00 = 'fl_hx' , 01 = fr_hx (-1) + # 01 = 'fr_hx' , 00 = fl_hx (-1) + # 02 = 'hl_hx' , 03 = hr_hx (-1) + # 03 = 'hr_hx' , 02 = hl_hx (-1) + # 04 = 'fl_hy' , 05 = fr_hy + # 05 = 'fr_hy' , 04 = fl_hy + # 06 = 'hl_hy' , 07 = hr_hy + # 07 = 'hr_hy' , 06 = hl_hy + # 08 = 'fl_kn' , 09 = fr_kn + # 09 = 'fr_kn' , 08 = fl_kn + # 10 = 'hl_kn' , 11 = hr_kn + # 11 = 'hr_kn' , 10 = hl_kn + + new_idx = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10] + dir_change = torch.tensor([-1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1], device=obs.device) + new_obs[B : 2 * B, 14:26] = new_obs[B : 2 * B, 14:26][:, new_idx] * dir_change + new_obs[B : 2 * B, 26:38] = new_obs[B : 2 * B, 26:38][:, new_idx] * dir_change + new_obs[B : 2 * B, 38:50] = new_obs[B : 2 * B, 38:50][:, new_idx] * dir_change + new_obs[B : 2 * B, 50:] = new_obs[B : 2 * B, 50:].reshape(B, 11, 24).flip(1).flatten(1, 2) + + return new_obs + + +def aug_actions( + actions: torch.Tensor, actions_log_prob: torch.Tensor, action_mean: torch.Tensor, action_sigma: torch.Tensor +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + new_actions = actions.repeat(2, 1) + new_actions_log_prob = actions_log_prob.repeat(2, 1) + new_action_mean = action_mean.repeat(2, 1) + new_action_sigma = action_sigma.repeat(2, 1) + B = actions.shape[0] + + new_idx = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10] + dir_change = torch.tensor([-1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1], device=actions.device) + new_actions[B : 2 * B, :] = new_actions[B : 2 * B, new_idx] * dir_change + new_action_mean[B : 2 * B, :] = new_action_mean[B : 2 * B, new_idx] * dir_change + new_action_sigma[B : 2 * B, :] = new_action_sigma[B : 2 * B, new_idx] + + return new_actions, new_actions_log_prob, new_action_mean, new_action_sigma + + +def aug_action(actions: torch.Tensor) -> torch.Tensor: + new_actions = actions.repeat(2, 1) + B = actions.shape[0] + + new_idx = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10] + dir_change = torch.tensor([-1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1], device=actions.device) + new_actions[B : 2 * B, :] = new_actions[B : 2 * B, new_idx] * dir_change + + return new_actions + + +def aug_func(obs=None, actions=None, env=None, is_critic=False): + aug_obs = None + aug_act = None + if obs is not None: + if is_critic: + aug_obs = aug_observation(obs) + aug_obs = aug_observation(obs) + if actions is not None: + aug_act = aug_action(actions) + return aug_obs, aug_act diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/__init__.py new file mode 100644 index 0000000..0e0c812 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .rewards import * diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/rewards.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/rewards.py new file mode 100644 index 0000000..9d317f6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/mdp/rewards.py @@ -0,0 +1,210 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the reward functions that can be used for Spot's locomotion task. + +The functions can be passed to the :class:`isaaclab.managers.RewardTermCfg` object to +specify the reward function and its parameters. +""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import ManagerTermBase, SceneEntityCfg +from isaaclab.sensors import ContactSensor + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + from isaaclab.managers import RewardTermCfg + + +def reward_forward_velocity( + env: ManagerBasedRLEnv, + std: float, + max_iter: int = 150, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """Reward tracking of linear velocity commands (xy axes) using exponential kernel.""" + # extract the used quantities (to enable type-hinting) + asset: RigidObject = env.scene[asset_cfg.name] + root_lin_vel_b = asset.data.root_lin_vel_b + forward_velocity = root_lin_vel_b[:, 0] + current_iter = int(env.common_step_counter / 48) + distance = torch.norm(env.command_manager.get_command("goal_point")[:, :2], dim=1) + return torch.where(distance > 0.4, torch.tanh(forward_velocity.clamp(-1, 1) / std) * (current_iter < max_iter), 0) + + +def air_time_reward( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, + sensor_cfg: SceneEntityCfg, + mode_time: float, + velocity_threshold: float, +) -> torch.Tensor: + """Reward longer feet air and contact time.""" + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + asset: Articulation = env.scene[asset_cfg.name] + if contact_sensor.cfg.track_air_time is False: + raise RuntimeError("Activate ContactSensor's track_air_time!") + # compute the reward + current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids] + current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids] + + t_max = torch.max(current_air_time, current_contact_time) + t_min = torch.clip(t_max, max=mode_time) + stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time) + distance = torch.norm(env.command_manager.get_command("goal_point")[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 4) + body_vel = torch.linalg.norm(asset.data.root_com_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 4) + reward = torch.where( + torch.logical_or(distance > 0.4, body_vel > velocity_threshold), + torch.where(t_max < mode_time, t_min, 0), + stance_cmd_reward, + ) + return torch.sum(reward, dim=1) + + +class GaitReward(ManagerTermBase): + """Gait enforcing reward term for quadrupeds. + + This reward penalizes contact timing differences between selected foot pairs defined in :attr:`synced_feet_pair_names` + to bias the policy towards a desired gait, i.e trotting, bounding, or pacing. Note that this reward is only for + quadrupedal gaits with two pairs of synchronized feet. + """ + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + """Initialize the term. + + Args: + cfg: The configuration of the reward. + env: The RL environment instance. + """ + super().__init__(cfg, env) + self.std: float = cfg.params["std"] + self.max_err: float = cfg.params["max_err"] + self.velocity_threshold: float = cfg.params["velocity_threshold"] + self.contact_sensor: ContactSensor = env.scene.sensors[cfg.params["sensor_cfg"].name] + self.asset: Articulation = env.scene[cfg.params["asset_cfg"].name] + # match foot body names with corresponding foot body ids + synced_feet_pair_names = cfg.params["synced_feet_pair_names"] + if ( + len(synced_feet_pair_names) != 2 + or len(synced_feet_pair_names[0]) != 2 + or len(synced_feet_pair_names[1]) != 2 + ): + raise ValueError("This reward only supports gaits with two pairs of synchronized feet, like trotting.") + synced_feet_pair_0 = self.contact_sensor.find_bodies(synced_feet_pair_names[0])[0] + synced_feet_pair_1 = self.contact_sensor.find_bodies(synced_feet_pair_names[1])[0] + self.synced_feet_pairs = [synced_feet_pair_0, synced_feet_pair_1] + + def __call__( + self, + env: ManagerBasedRLEnv, + std: float, + max_err: float, + velocity_threshold: float, + synced_feet_pair_names, + asset_cfg: SceneEntityCfg, + sensor_cfg: SceneEntityCfg, + max_iterations: int = 500, + ) -> torch.Tensor: + """Compute the reward. + + This reward is defined as a multiplication between six terms where two of them enforce pair feet + being in sync and the other four rewards if all the other remaining pairs are out of sync + + Args: + env: The RL environment instance. + Returns: + The reward value. + """ + current_iter = int(env.common_step_counter / 48) + if current_iter > max_iterations: + return torch.zeros(self.num_envs, device=self.device) + # for synchronous feet, the contact (air) times of two feet should match + sync_reward_0 = self._sync_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[0][1]) + sync_reward_1 = self._sync_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[1][1]) + sync_reward = sync_reward_0 * sync_reward_1 + # for asynchronous feet, the contact time of one foot should match the air time of the other one + async_reward_0 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][0]) + async_reward_1 = self._async_reward_func(self.synced_feet_pairs[0][1], self.synced_feet_pairs[1][1]) + async_reward_2 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][1]) + async_reward_3 = self._async_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[0][1]) + async_reward = async_reward_0 * async_reward_1 * async_reward_2 * async_reward_3 + # only enforce gait if cmd > 0 + distance = torch.norm(env.command_manager.get_command("goal_point")[:, :2], dim=1) + body_vel = torch.linalg.norm(self.asset.data.root_com_lin_vel_b[:, :2], dim=1) + return torch.where( + torch.logical_or(distance > 0.4, body_vel > self.velocity_threshold), sync_reward * async_reward, 0.0 + ) + + """ + Helper functions. + """ + + def _sync_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor: + """Reward synchronization of two feet.""" + air_time = self.contact_sensor.data.current_air_time + contact_time = self.contact_sensor.data.current_contact_time + # penalize the difference between the most recent air time and contact time of synced feet pairs. + se_air = torch.clip(torch.square(air_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2) + se_contact = torch.clip(torch.square(contact_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2) + return torch.exp(-(se_air + se_contact) / self.std) + + def _async_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor: + """Reward anti-synchronization of two feet.""" + air_time = self.contact_sensor.data.current_air_time + contact_time = self.contact_sensor.data.current_contact_time + # penalize the difference between opposing contact modes air time of feet 1 to contact time of feet 2 + # and contact time of feet 1 to air time of feet 2) of feet pairs that are not in sync with each other. + se_act_0 = torch.clip(torch.square(air_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2) + se_act_1 = torch.clip(torch.square(contact_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2) + return torch.exp(-(se_act_0 + se_act_1) / self.std) + + +def air_time_variance_penalty(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor: + """Penalize variance in the amount of time each foot spends in the air/on the ground relative to each other""" + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + if contact_sensor.cfg.track_air_time is False: + raise RuntimeError("Activate ContactSensor's track_air_time!") + # compute the reward + last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids] + last_contact_time = contact_sensor.data.last_contact_time[:, sensor_cfg.body_ids] + return torch.var(torch.clip(last_air_time, max=0.5), dim=1) + torch.var( + torch.clip(last_contact_time, max=0.5), dim=1 + ) + + +def foot_slip_penalty( + env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, sensor_cfg: SceneEntityCfg, threshold: float +) -> torch.Tensor: + """Penalize foot planar (xy) slip when in contact with the ground""" + asset: RigidObject = env.scene[asset_cfg.name] + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + + # check if contact force is above threshold + net_contact_forces = contact_sensor.data.net_forces_w_history + is_contact = torch.max(torch.norm(net_contact_forces[:, :, sensor_cfg.body_ids], dim=-1), dim=1)[0] > threshold + foot_planar_velocity = torch.linalg.norm(asset.data.body_com_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2) + + reward = is_contact * foot_planar_velocity + return torch.sum(reward, dim=1) + + +def joint_position_penalty( + env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float, velocity_threshold: float +) -> torch.Tensor: + """Penalize joint position error from default on the articulation.""" + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + distance = torch.norm(env.command_manager.get_command("goal_point")[:, :2], dim=1) + body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1) + reward = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1) + return torch.where((distance > 0.4) | (body_vel > velocity_threshold), reward, stand_still_scale * reward) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/spot_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/spot_env_cfg.py new file mode 100644 index 0000000..8baa83b --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot/spot_env_cfg.py @@ -0,0 +1,143 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.spot as spot + +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +import uwlab_tasks.manager_based.locomotion.advance_skills.config.spot.mdp as spot_mdp + +from ... import advance_skills_base_env, advance_skills_env + + +@configclass +class SpotActionsCfg: + actions = spot.SPOT_JOINT_POSITION + + +@configclass +class SportRewardsCfg(advance_skills_base_env.RewardsCfg): + move_forward = RewTerm( + func=spot_mdp.reward_forward_velocity, + weight=0.3, + params={ + "std": 1, + "max_iter": 200, + }, + ) + + air_time = RewTerm( + func=spot_mdp.air_time_reward, + weight=1.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + + gait = RewTerm( + func=spot_mdp.GaitReward, + weight=2.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + "max_iterations": 400.0, + }, + ) + + # -- penalties + air_time_variance = RewTerm( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + + foot_slip = RewTerm( + func=spot_mdp.foot_slip_penalty, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + + joint_pos = RewTerm( + func=spot_mdp.joint_position_penalty, + weight=-0.4, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 5.0, + "velocity_threshold": 0.5, + }, + ) + + +@configclass +class SpotEnvMixin: + actions: SpotActionsCfg = SpotActionsCfg() + rewards: SportRewardsCfg = SportRewardsCfg() + + def __post_init__(self: advance_skills_base_env.AdvanceSkillsBaseEnvCfg): + # Ensure parent classes run their setup first + super().__post_init__() + # overwrite as spot's body names for sensors + self.scene.robot = spot.SPOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/body" + self.scene.height_scanner.pattern_cfg.resolution = 0.15 + self.scene.height_scanner.pattern_cfg.size = (3.5, 1.5) + + # overwrite as spot's body names for events + self.events.add_base_mass.params["asset_cfg"].body_names = "body" + self.events.base_external_force_torque.params["asset_cfg"].body_names = "body" + + self.rewards.undesired_contact.params["sensor_cfg"].body_names = ["body", ".*leg"] + self.rewards.feet_lin_acc_l2.params["robot_cfg"].body_names = ".*_foot" + self.rewards.feet_rot_acc_l2.params["robot_cfg"].body_names = ".*_foot" + self.rewards.illegal_contact_penalty.params["sensor_cfg"].body_names = "body" + + self.terminations.base_contact.params["sensor_cfg"].body_names = "body" + self.viewer.body_name = "body" + + self.sim.dt = 0.002 + self.decimation = 10 + + +@configclass +class AdvanceSkillsSpotEnvCfg(SpotEnvMixin, advance_skills_env.AdvanceSkillsEnvCfg): + pass + + +@configclass +class PitSpotEnvCfg(SpotEnvMixin, advance_skills_env.PitEnvConfig): + pass + + +@configclass +class GapSpotEnvCfg(SpotEnvMixin, advance_skills_env.GapEnvConfig): + pass + + +@configclass +class SlopeInvSpotEnvCfg(SpotEnvMixin, advance_skills_env.SlopeInvEnvConfig): + pass + + +@configclass +class ExtremeStairSpotEnvCfg(SpotEnvMixin, advance_skills_env.ExtremeStairEnvConfig): + pass + + +@configclass +class SquarePillarObstacleSpotEnvCfg(SpotEnvMixin, advance_skills_env.SquarePillarObstacleEnvConfig): + pass diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/__init__.py new file mode 100644 index 0000000..661ebb9 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/__init__.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +gym.register( + id="UW-Position-Advance-Skills-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:AdvanceSkillsSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Pit-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:PitSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Gap-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:GapSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Inv-Slope-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:SlopeInvSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Extreme-Stair-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:ExtremeStairSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Square-Obstacle-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:SquarePillarObstacleSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Position-Irregular-Obstacle-Arm-Spot-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.spot_env_cfg:IrregularPillarObstacleSpotEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AdvanceSkillsSpotPPORunnerCfg", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..cb18186 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/agents/rsl_rl_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class AdvanceSkillsSpotPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 48 + max_iterations = 2000 + save_interval = 100 + resume = False + experiment_name = "spot_advance_skills" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/spot_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/spot_env_cfg.py new file mode 100644 index 0000000..c587b0f --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/config/spot_with_arm/spot_env_cfg.py @@ -0,0 +1,144 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import uwlab_assets.robots.spot as spot + +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +import uwlab_tasks.manager_based.locomotion.advance_skills.config.spot.mdp as spot_mdp + +from ... import advance_skills_base_env, advance_skills_env +from ... import mdp as mdp + + +@configclass +class SpotActionsCfg: + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=["^(?!.*arm0).*$"], scale=0.2, use_default_offset=True + ) + arm_pos = mdp.DefaultJointPositionStaticActionCfg( + asset_name="robot", joint_names=["arm0.*"], scale=1, use_default_offset=True + ) + + +@configclass +class SportRewardsCfg(advance_skills_base_env.RewardsCfg): + move_forward = RewTerm( + func=spot_mdp.reward_forward_velocity, + weight=0.3, + params={ + "std": 1, + "max_iter": 200, + }, + ) + + air_time = RewTerm( + func=spot_mdp.air_time_reward, + weight=1.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + + gait = RewTerm( + func=spot_mdp.GaitReward, + weight=2.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + "max_iterations": 400, + }, + ) + + # -- penalties + air_time_variance = RewTerm( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + + foot_slip = RewTerm( + func=spot_mdp.foot_slip_penalty, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + + +@configclass +class SpotEnvMixin: + actions: SpotActionsCfg = SpotActionsCfg() + rewards: SportRewardsCfg = SportRewardsCfg() + + def __post_init__(self: advance_skills_base_env.AdvanceSkillsBaseEnvCfg): + # Ensure parent classes run their setup first + super().__post_init__() + # overwrite as spot's body names for sensors + self.scene.robot = spot.SPOT_WITH_ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/body" + self.scene.height_scanner.pattern_cfg.resolution = 0.15 + self.scene.height_scanner.pattern_cfg.size = (3.5, 1.5) + + # overwrite as spot's body names for events + self.events.add_base_mass.params["asset_cfg"].body_names = "body" + self.events.base_external_force_torque.params["asset_cfg"].body_names = "body" + + self.rewards.undesired_contact.params["sensor_cfg"].body_names = ["body", ".*leg"] + self.rewards.feet_lin_acc_l2.params["robot_cfg"].body_names = ".*_foot" + self.rewards.feet_rot_acc_l2.params["robot_cfg"].body_names = ".*_foot" + self.rewards.illegal_contact_penalty.params["sensor_cfg"].body_names = "body" + + self.terminations.base_contact.params["sensor_cfg"].body_names = "body" + self.viewer.body_name = "body" + + self.sim.dt = 0.002 + self.decimation = 10 + + +@configclass +class AdvanceSkillsSpotEnvCfg(SpotEnvMixin, advance_skills_env.AdvanceSkillsEnvCfg): + pass + + +@configclass +class PitSpotEnvCfg(SpotEnvMixin, advance_skills_env.PitEnvConfig): + pass + + +@configclass +class GapSpotEnvCfg(SpotEnvMixin, advance_skills_env.GapEnvConfig): + pass + + +@configclass +class SlopeInvSpotEnvCfg(SpotEnvMixin, advance_skills_env.SlopeInvEnvConfig): + pass + + +@configclass +class ExtremeStairSpotEnvCfg(SpotEnvMixin, advance_skills_env.ExtremeStairEnvConfig): + pass + + +@configclass +class SquarePillarObstacleSpotEnvCfg(SpotEnvMixin, advance_skills_env.SquarePillarObstacleEnvConfig): + pass + + +@configclass +class IrregularPillarObstacleSpotEnvCfg(SpotEnvMixin, advance_skills_env.IrregularPillarObstacleEnvConfig): + pass diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/__init__.py new file mode 100644 index 0000000..25e0278 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the locomotion environments.""" + +from isaaclab.envs.mdp import * +from uwlab.envs.mdp import * # noqa: F401, F403 + +from .curriculums import * # noqa: F401, F403 +from .observations import * # noqa: F401, F403 +from .rewards import * # noqa: F401, F403 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/curriculums.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/curriculums.py new file mode 100644 index 0000000..bcd807f --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/curriculums.py @@ -0,0 +1,51 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from collections.abc import Sequence +from typing import TYPE_CHECKING + +from isaaclab.terrains import TerrainImporter + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def terrain_levels_vel( + env: ManagerBasedRLEnv, + env_ids: Sequence[int], + demotion_fraction: float = 0.05, +) -> torch.Tensor: + """Curriculum based on the distance the robot walked when commanded to reach a desired location. + + This term is used to increase the difficulty of the terrain when the robot walks far enough and decrease the + difficulty when the robot walks less than specified distance required by the position command. + + .. note:: + It is only possible to use this term with the terrain type ``generator``. For further information + on different terrain types, check the :class:`isaaclab.terrains.TerrainImporter` class. + + Returns: + The mean terrain level for the given environment ids. + """ + # extract the used quantities (to enable type-hinting) + terrain: TerrainImporter = env.scene.terrain + command = env.command_manager.get_command("goal_point") + # compute the distance the robot walked + distance = command[env_ids, :2].norm(2, dim=1) + # robots that walked far enough progress to harder terrains + move_up = distance < 0.5 + # robots that walked less than half of their required distance go to simpler terrains + total_distance = ( + env.command_manager.get_term("goal_point").pos_command_w[env_ids, :2] - env.scene.env_origins[env_ids, :2] + ).norm(2, dim=1) + distance_traveled = 1 - distance / total_distance + move_down = distance_traveled < demotion_fraction + # update terrain levels + terrain.update_env_origins(env_ids, move_up, move_down) + # return the mean terrain level + return torch.mean(terrain.terrain_levels.float()) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/observations.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/observations.py new file mode 100644 index 0000000..229b728 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/observations.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import torch + +from isaaclab.envs import ManagerBasedRLEnv + + +def time_left(env: ManagerBasedRLEnv) -> torch.Tensor: + if hasattr(env, "episode_length_buf"): + life_left = (1 - env.episode_length_buf.float() / env.max_episode_length) * env.max_episode_length_s + else: + life_left = torch.ones(env.num_envs, device=env.device, dtype=torch.float) * env.max_episode_length_s + return life_left.view(-1, 1) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/rewards.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/rewards.py new file mode 100644 index 0000000..0c66220 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/mdp/rewards.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# Reference: [Advanced Skills by Learning Locomotion and Local Navigation End-to-End, Nikita Rudin(s), +# https://arxiv.org/pdf/2209.12827] + +import torch + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.sensors import ContactSensor +from uwlab.envs import DataManagerBasedRLEnv + + +def task_reward( + env: DataManagerBasedRLEnv, reward_window: float = 1.0 # Represents Tr, the length of the reward window +): + # + # See section II.B (page 3) Exploration Reward for details. + # Calculate the time step at which the reward window starts + reward_start_step = env.max_episode_length * (1 - reward_window / env.max_episode_length_s) + + # Calculate the distance to the goal (‖xb − x∗b‖^2), squared L2 norm of the difference + distance_to_goal = env.command_manager.get_command("goal_point")[:, :2].norm(2, -1).pow(2) + + # Calculate task reward as per the equation: + # If within the reward window, r_task is non-zero + task_reward = (1 / (1 + distance_to_goal)) * (env.episode_length_buf > reward_start_step).float() + residue_task_reward = (1 / reward_window) * task_reward + + # TODO: Try no to change exploration weight here. + # "The following line removes the exploration reward (r_bias) once the task reward (r_task) + # reaches 50% of its maximum value, as described in the paper." [II.B (page 3)] + if task_reward.mean() > 0.5 and (env.reward_manager.get_term_cfg("exploration").weight > 0.0): + env.reward_manager.get_term_cfg("exploration").weight = 0.0 + + return residue_task_reward + + +def heading_tracking(env: DataManagerBasedRLEnv, distance_threshold: float = 2.0, reward_window: float = 2.0): + desired_heading = env.command_manager.get_command("goal_point")[:, 3] + reward_start_step = env.max_episode_length * (1 - reward_window / env.max_episode_length_s) + current_dist = env.command_manager.get_command("goal_point")[:, :2].norm(2, -1) + r_heading_tracking = ( + 1 + / reward_window + * (1 / (1 + desired_heading.pow(2))) + * (current_dist < distance_threshold).float() + * (env.episode_length_buf > reward_start_step).float() + ) + return r_heading_tracking + + +def exploration_reward(env: DataManagerBasedRLEnv, robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")): + # Retrieve the robot and target data + robot: Articulation = env.scene[robot_cfg.name] + base_velocity = robot.data.root_lin_vel_b # Robot's current base velocity vector + target_position = env.command_manager.get_command("goal_point")[ + :, :2 + ] # Target position vector relative to robot base + + # Compute the dot product of the robot's base velocity and target position vectors + velocity_alignment = (base_velocity[:, :2] * target_position).sum(-1) + + # Calculate the norms (magnitudes) of the velocity and target position vectors + velocity_magnitude = torch.norm(base_velocity, p=2, dim=-1) + target_magnitude = torch.norm(target_position, p=2, dim=-1) + + # Calculate the exploration reward by normalizing the dot product (cosine similarity) + # Small epsilon added in denominator to prevent division by zero + exploration_reward = velocity_alignment / (velocity_magnitude * target_magnitude + 1e-6) + return exploration_reward + + +def stall_penalty( + env: DataManagerBasedRLEnv, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + base_vel_threshold: float = 0.1, + distance_threshold: float = 0.5, +): + robot: Articulation = env.scene[robot_cfg.name] + base_vel = robot.data.root_lin_vel_b.norm(2, dim=-1) + distance_to_goal = env.command_manager.get_command("goal_point")[:, :2].norm(2, dim=-1) + return (base_vel < base_vel_threshold) & (distance_to_goal > distance_threshold) + + +def illegal_contact_penalty(env: DataManagerBasedRLEnv, threshold: float, sensor_cfg: SceneEntityCfg): + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] # type: ignore + net_contact_forces = contact_sensor.data.net_forces_w_history + # check if any contact force exceeds the threshold + return torch.any( + torch.max(torch.norm(net_contact_forces[:, :, sensor_cfg.body_ids], dim=-1), dim=1)[0] > threshold, + dim=1, # type: ignore + ).float() + + +def feet_lin_acc_l2(env: DataManagerBasedRLEnv, robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")): + robot: Articulation = env.scene[robot_cfg.name] + feet_acc = torch.sum(torch.square(robot.data.body_lin_acc_w[..., robot_cfg.body_ids, :]), dim=(1, 2)) + return feet_acc + + +def feet_rot_acc_l2(env: DataManagerBasedRLEnv, robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")): + robot: Articulation = env.scene[robot_cfg.name] + feet_acc = torch.sum(torch.square(robot.data.body_ang_acc_w[..., robot_cfg.body_ids, :]), dim=(1, 2)) + return feet_acc + + +def stand_penalty( + env: DataManagerBasedRLEnv, + height_threshold: float, + robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + robot: Articulation = env.scene[robot_cfg.name] + base_height = robot.data.root_link_pos_w[:, 2] # z-coordinate of the base + penalty = (base_height < height_threshold).float() * -1.0 + return penalty diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/__init__.py new file mode 100644 index 0000000..edd1266 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .terrain_cfg import * diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/terrain_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/terrain_cfg.py new file mode 100644 index 0000000..b80a951 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/advance_skills/terrains/terrain_cfg.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from uwlab import terrains as terrain_cfg +from uwlab.terrains.utils import FlatPatchSamplingByRadiusCfg + +GAP = terrain_cfg.MeshGapTerrainCfg( + platform_width=3.0, + gap_width_range=(0.05, 1.5), + proportion=0.2, + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + radius_range=(2.0, 4.0), + max_height_diff=0.2, + ) + }, +) + +PIT = terrain_cfg.MeshPitTerrainCfg( + platform_width=3.0, + pit_depth_range=(0.05, 1.2), + proportion=0.2, + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + max_height_diff=0.2, + radius_range=(2.0, 4.0), + ) + }, +) + +SQUARE_PILLAR_OBSTACLE = terrain_cfg.HfDiscreteObstaclesTerrainCfg( + num_obstacles=8, + obstacle_height_mode="fixed", + obstacle_height_range=(2.0, 3.0), + obstacle_width_range=(0.5, 1.5), + proportion=0.2, + platform_width=2, + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + max_height_diff=0.2, + radius_range=(4.0, 6.0), + ) + }, +) + +IRREGULAR_PILLAR_OBSTACLE = terrain_cfg.MeshRepeatedBoxesTerrainCfg( + platform_width=2, + max_height_noise=0.5, + proportion=0.2, + object_params_start=terrain_cfg.MeshRepeatedBoxesTerrainCfg.ObjectCfg( + num_objects=5, height=4.0, size=(0.5, 0.5), max_yx_angle=0.0, degrees=True + ), + object_params_end=terrain_cfg.MeshRepeatedBoxesTerrainCfg.ObjectCfg( + num_objects=10, height=6.0, size=(1.0, 1.0), max_yx_angle=0.0, degrees=True + ), + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + max_height_diff=0.2, + radius_range=(4.0, 6.0), + ) + }, +) + +SLOPE_INV = terrain_cfg.HfInvertedPyramidSlopedTerrainCfg( + proportion=0.2, + slope_range=(0.0, 0.9), + platform_width=2.0, + border_width=1.5, + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + max_height_diff=0.2, + radius_range=(3.0, 4.5), + ) + }, +) + +EXTREME_STAIR = terrain_cfg.HfPyramidStairsTerrainCfg( + platform_width=3.0, + step_height_range=(0.05, 0.2), + step_width=0.3, + proportion=0.2, + inverted=True, + border_width=1.0, + patch_sampling={ + "target": FlatPatchSamplingByRadiusCfg( + num_patches=10, + patch_radius=0.5, + max_height_diff=0.2, + radius_range=(4.0, 4.5), + ) + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/__init__.py new file mode 100644 index 0000000..b784a89 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Locomotion environments with velocity-tracking commands. + +These environments are based on the `legged_gym` environments provided by Rudin et al. + +Reference: + https://github.com/leggedrobotics/legged_gym +""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/__init__.py new file mode 100644 index 0000000..d3ddfa9 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for velocity-based locomotion environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py new file mode 100644 index 0000000..1cecaa5 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents, flat_env_cfg, rough_env_cfg + +## +# Register Gym environments. +## + +gym.register( + id="UW-Velocity-Flat-Anymal-C-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": flat_env_cfg.AnymalCRoughEnvCfg, + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AnymalCRoughPPORunnerFlatCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="UW-Velocity-Flat-Anymal-C-Play-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": flat_env_cfg.AnymalCFlatEnvCfg_PLAY, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AnymalCRoughPPORunnerFlatCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + + +gym.register( + id="UW-Velocity-Rough-Anymal-C-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AnymalCRoughPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + }, +) + +gym.register( + id="UW-Velocity-Rough-Anymal-C-Play-v0", + entry_point="uwlab.envs:DataManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg_PLAY, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:AnymalCRoughPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_flat_ppo_cfg.yaml b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_flat_ppo_cfg.yaml new file mode 100644 index 0000000..c472ce6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_flat_ppo_cfg.yaml @@ -0,0 +1,76 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [128, 128, 128] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: anymal_c_flat + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: False + normalize_value: True + value_bootstrap: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.6 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-3 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 20000 + max_epochs: 300 + save_best_after: 100 + save_frequency: 50 + grad_norm: 1.0 + entropy_coef: 0.005 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 5 + critic_coef: 2.0 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_rough_ppo_cfg.yaml b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_rough_ppo_cfg.yaml new file mode 100644 index 0000000..042799d --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rl_games_rough_ppo_cfg.yaml @@ -0,0 +1,76 @@ +params: + seed: 42 + + # environment wrapper clipping + env: + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512, 256, 128] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: anymal_c_rough + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: False + normalize_value: True + value_bootstrap: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.6 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-3 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 20000 + max_epochs: 1500 + save_best_after: 100 + save_frequency: 50 + grad_norm: 1.0 + entropy_coef: 0.005 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 5 + critic_coef: 2.0 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_cfg.py new file mode 100644 index 0000000..d677fa7 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_cfg.py @@ -0,0 +1,157 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 1500 + save_interval = 50 + resume = False + experiment_name = "anymal_c_rough" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +@configclass +class AnymalCRoughPositionPPORunnerCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "anymal_c_rough_position" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdBoxRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdBoxRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdBoxRghCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdBoxRgh" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdBoxHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdBoxHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdBoxRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdBoxRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerIprmdBoxRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "IprmdBoxRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerBoxRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "BoxRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdRghHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdHfslp" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdRghCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmdRgh" + + +@configclass +class AnymalCRoughPPORunnerPrmdIprmdCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "PrmdIprmd" + + +@configclass +class AnymalCRoughPPORunnerIprmdBoxCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "IprmBox" + + +@configclass +class AnymalCRoughPPORunnerBoxRghCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "BoxRgh" + + +@configclass +class AnymalCRoughPPORunnerRghHfslpCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "RghHfslp" + + +@configclass +class AnymalCRoughPPORunnerFlatCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "Flat" + + +@configclass +class AnymalCRoughPPORunnerBoxCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + self.experiment_name = "Box" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml new file mode 100644 index 0000000..a642556 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml @@ -0,0 +1,67 @@ +seed: 42 + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see skrl.utils.model_instantiators.torch.gaussian_model for parameter details + clip_actions: True + clip_log_std: True + initial_log_std: 0 + min_log_std: -20.0 + max_log_std: 2.0 + input_shape: "Shape.STATES" + hiddens: [128, 128, 128] + hidden_activation: ["elu", "elu"] + output_shape: "Shape.ACTIONS" + output_activation: "" + output_scale: 1.0 + value: # see skrl.utils.model_instantiators.torch.deterministic_model for parameter details + clip_actions: False + input_shape: "Shape.STATES" + hiddens: [128, 128, 128] + hidden_activation: ["elu", "elu"] + output_shape: "Shape.ONE" + output_activation: "" + output_scale: 1.0 + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + rollouts: 24 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.e-3 + learning_rate_scheduler: "KLAdaptiveLR" + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: "RunningStandardScaler" + state_preprocessor_kwargs: null + value_preprocessor: "RunningStandardScaler" + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.005 + value_loss_scale: 1.0 + kl_threshold: 0 + rewards_shaper_scale: 1.0 + # logging and checkpoint + experiment: + directory: "anymal_c_flat" + experiment_name: "" + write_interval: 36 + checkpoint_interval: 360 + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + timesteps: 7200 + environment_info: "log" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml new file mode 100644 index 0000000..1e33f06 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml @@ -0,0 +1,67 @@ +seed: 42 + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see skrl.utils.model_instantiators.torch.gaussian_model for parameter details + clip_actions: True + clip_log_std: True + initial_log_std: 0 + min_log_std: -20.0 + max_log_std: 2.0 + input_shape: "Shape.STATES" + hiddens: [512, 256, 128] + hidden_activation: ["elu", "elu"] + output_shape: "Shape.ACTIONS" + output_activation: "" + output_scale: 1.0 + value: # see skrl.utils.model_instantiators.torch.deterministic_model for parameter details + clip_actions: False + input_shape: "Shape.STATES" + hiddens: [512, 256, 128] + hidden_activation: ["elu", "elu"] + output_shape: "Shape.ONE" + output_activation: "" + output_scale: 1.0 + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + rollouts: 24 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.e-3 + learning_rate_scheduler: "KLAdaptiveLR" + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: "RunningStandardScaler" + state_preprocessor_kwargs: null + value_preprocessor: "RunningStandardScaler" + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.005 + value_loss_scale: 1.0 + kl_threshold: 0 + rewards_shaper_scale: 1.0 + # logging and checkpoint + experiment: + directory: "anymal_c_rough" + experiment_name: "" + write_interval: 180 + checkpoint_interval: 1800 + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + timesteps: 36000 + environment_info: "log" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py new file mode 100644 index 0000000..acce623 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from .rough_env_cfg import AnymalCRoughEnvCfg + + +@configclass +class AnymalCFlatEnvCfg(AnymalCRoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # override rewards + self.rewards.flat_orientation_l2.weight = -5.0 + self.rewards.dof_torques_l2.weight = -2.5e-5 + self.rewards.feet_air_time.weight = 0.5 + # change terrain to flat + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # no height scan + self.scene.height_scanner = None + self.observations.policy.height_scan = None + # no terrain curriculum + self.curriculum.terrain_levels = None + + +class AnymalCFlatEnvCfg_PLAY(AnymalCFlatEnvCfg): + def __post_init__(self) -> None: + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py new file mode 100644 index 0000000..f9abfd0 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from uwlab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_C_CFG # isort: skip # noqa: F401 + + +@configclass +class AnymalCRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # switch robot to anymal-c + self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + +@configclass +class AnymalCRoughEnvCfg_PLAY(AnymalCRoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/__init__.py new file mode 100644 index 0000000..815b09a --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +gym.register( + id="UW-Velocity-Flat-Spot-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:SpotFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:SpotPPORunnerCfg", + }, +) + +gym.register( + id="UW-Velocity-Rough-Spot-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:SpotRoughEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:SpotPPORunnerCfg", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..704e0d4 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class SpotPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 20000 + save_interval = 50 + experiment_name = "spot_velocity" + empirical_normalization = False + store_code_state = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=0.5, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0025, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py new file mode 100644 index 0000000..8a5f936 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py @@ -0,0 +1,356 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import isaaclab.sim as sim_utils +import isaaclab.terrains as terrain_gen +from isaaclab.envs import ViewerCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg, SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.locomotion.velocity.config.spot.mdp as spot_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from uwlab_assets.robots.spot.spot import SPOT_CFG # isort: skip + + +COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg( + size=(8.0, 8.0), + border_width=20.0, + num_rows=9, + num_cols=21, + horizontal_scale=0.1, + vertical_scale=0.005, + slope_threshold=0.75, + difficulty_range=(0.0, 1.0), + use_cache=False, + sub_terrains={ + "flat": terrain_gen.MeshPlaneTerrainCfg(proportion=0.2), + "random_rough": terrain_gen.HfRandomUniformTerrainCfg( + proportion=0.2, noise_range=(0.02, 0.05), noise_step=0.02, border_width=0.25 + ), + }, +) + + +@configclass +class SpotActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.2, use_default_offset=True) + + +@configclass +class SpotCommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.1, + rel_heading_envs=0.0, + heading_command=False, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0) + ), + ) + + +@configclass +class SpotObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # `` observation terms (order preserved) + base_lin_vel = ObsTerm( + func=mdp.base_lin_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + params={"asset_cfg": SceneEntityCfg("robot")}, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.05, n_max=0.05) + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.5, n_max=0.5) + ) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class SpotEventCfg: + """Configuration for randomization.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.3, 1.0), + "dynamic_friction_range": (0.3, 0.8), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "mass_distribution_params": (-2.5, 2.5), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot"), + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-1.5, 1.5), + "y": (-1.0, 1.0), + "z": (-0.5, 0.5), + "roll": (-0.7, 0.7), + "pitch": (-0.7, 0.7), + "yaw": (-1.0, 1.0), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=spot_mdp.reset_joints_around_default, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-2.5, 2.5), + "asset_cfg": SceneEntityCfg("robot"), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot"), + "velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}, + }, + ) + + +@configclass +class SpotRewardsCfg: + # -- task + air_time = RewardTermCfg( + func=spot_mdp.air_time_reward, + weight=5.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + base_angular_velocity = RewardTermCfg( + func=spot_mdp.base_angular_velocity_reward, + weight=5.0, + params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + base_linear_velocity = RewardTermCfg( + func=spot_mdp.base_linear_velocity_reward, + weight=5.0, + params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + foot_clearance = RewardTermCfg( + func=spot_mdp.foot_clearance_reward, + weight=0.5, + params={ + "std": 0.05, + "tanh_mult": 2.0, + "target_height": 0.1, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + }, + ) + gait = RewardTermCfg( + func=spot_mdp.GaitReward, + weight=10.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + }, + ) + + # -- penalties + action_smoothness = RewardTermCfg(func=spot_mdp.action_smoothness_penalty, weight=-1.0) + air_time_variance = RewardTermCfg( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + base_motion = RewardTermCfg( + func=spot_mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + base_orientation = RewardTermCfg( + func=spot_mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + foot_slip = RewardTermCfg( + func=spot_mdp.foot_slip_penalty, + weight=-0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + joint_acc = RewardTermCfg( + func=spot_mdp.joint_acceleration_penalty, + weight=-1.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + joint_pos = RewardTermCfg( + func=spot_mdp.joint_position_penalty, + weight=-0.7, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 5.0, + "velocity_threshold": 0.5, + }, + ) + joint_torques = RewardTermCfg( + func=spot_mdp.joint_torques_penalty, + weight=-5.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*")}, + ) + joint_vel = RewardTermCfg( + func=spot_mdp.joint_velocity_penalty, + weight=-1.0e-2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + + +@configclass +class SpotTerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + body_contact = DoneTerm( + func=mdp.illegal_contact, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), "threshold": 1.0}, + ) + terrain_out_of_bounds = DoneTerm( + func=mdp.terrain_out_of_bounds, + params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0}, + time_out=True, + ) + + +@configclass +class SpotFlatEnvCfg(LocomotionVelocityRoughEnvCfg): + + # Basic settings' + observations: SpotObservationsCfg = SpotObservationsCfg() + actions: SpotActionsCfg = SpotActionsCfg() + commands: SpotCommandsCfg = SpotCommandsCfg() + + # MDP setting + rewards: SpotRewardsCfg = SpotRewardsCfg() + terminations: SpotTerminationsCfg = SpotTerminationsCfg() + events: SpotEventCfg = SpotEventCfg() + + # Viewer + viewer: ViewerCfg = ViewerCfg(eye=(1.0, 4.0, 1.5), origin_type="asset_body", asset_name="robot", body_name="body") + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # general settings + self.decimation = 10 # 50 Hz + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 0.002 # 500 Hz + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 + self.sim.physics_material.friction_combine_mode = "multiply" + self.sim.physics_material.restitution_combine_mode = "multiply" + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + self.scene.contact_forces.update_period = self.sim.dt + + # switch robot to Spot-d + self.scene.robot = SPOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # terrain + self.scene.terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=COBBLESTONE_ROAD_CFG, + max_init_terrain_level=COBBLESTONE_ROAD_CFG.num_rows - 1, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=True, + ) + + # no height scan + self.scene.height_scanner = None diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/mdp/__init__.py new file mode 100644 index 0000000..8a1ad1b --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/mdp/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the Spot locomotion task.""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/rough_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/rough_env_cfg.py new file mode 100644 index 0000000..74db481 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot/rough_env_cfg.py @@ -0,0 +1,310 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.envs import ViewerCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg, SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.locomotion.velocity.config.spot.mdp as spot_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from uwlab_assets.robots.spot.spot import SPOT_CFG # isort: skip + + +@configclass +class SpotActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.2, use_default_offset=True) + + +@configclass +class SpotCommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.1, + rel_heading_envs=0.0, + heading_command=False, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0) + ), + ) + + +@configclass +class SpotObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # `` observation terms (order preserved) + base_lin_vel = ObsTerm( + func=mdp.base_lin_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + params={"asset_cfg": SceneEntityCfg("robot")}, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.05, n_max=0.05) + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.5, n_max=0.5) + ) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class SpotEventCfg: + """Configuration for randomization.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.3, 1.0), + "dynamic_friction_range": (0.3, 0.8), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "mass_distribution_params": (-2.5, 2.5), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot"), + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-1.5, 1.5), + "y": (-1.0, 1.0), + "z": (-0.5, 0.5), + "roll": (-0.7, 0.7), + "pitch": (-0.7, 0.7), + "yaw": (-1.0, 1.0), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=spot_mdp.reset_joints_around_default, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-2.5, 2.5), + "asset_cfg": SceneEntityCfg("robot"), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot"), + "velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}, + }, + ) + + +@configclass +class SpotRewardsCfg: + # -- task + air_time = RewardTermCfg( + func=spot_mdp.air_time_reward, + weight=5.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + base_angular_velocity = RewardTermCfg( + func=spot_mdp.base_angular_velocity_reward, + weight=5.0, + params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + base_linear_velocity = RewardTermCfg( + func=spot_mdp.base_linear_velocity_reward, + weight=5.0, + params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + foot_clearance = RewardTermCfg( + func=spot_mdp.foot_clearance_reward, + weight=0.5, + params={ + "std": 0.05, + "tanh_mult": 2.0, + "target_height": 0.1, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + }, + ) + gait = RewardTermCfg( + func=spot_mdp.GaitReward, + weight=10.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + }, + ) + + # -- penalties + action_smoothness = RewardTermCfg(func=spot_mdp.action_smoothness_penalty, weight=-1.0) + air_time_variance = RewardTermCfg( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + base_motion = RewardTermCfg( + func=spot_mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + base_orientation = RewardTermCfg( + func=spot_mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + foot_slip = RewardTermCfg( + func=spot_mdp.foot_slip_penalty, + weight=-0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + joint_acc = RewardTermCfg( + func=spot_mdp.joint_acceleration_penalty, + weight=-1.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + joint_pos = RewardTermCfg( + func=spot_mdp.joint_position_penalty, + weight=-0.7, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 5.0, + "velocity_threshold": 0.5, + }, + ) + joint_torques = RewardTermCfg( + func=spot_mdp.joint_torques_penalty, + weight=-5.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*")}, + ) + joint_vel = RewardTermCfg( + func=spot_mdp.joint_velocity_penalty, + weight=-1.0e-2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + + +@configclass +class SpotTerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + body_contact = DoneTerm( + func=mdp.illegal_contact, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), "threshold": 1.0}, + ) + terrain_out_of_bounds = DoneTerm( + func=mdp.terrain_out_of_bounds, + params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0}, + time_out=True, + ) + + +@configclass +class SpotRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + + # Basic settings' + observations: SpotObservationsCfg = SpotObservationsCfg() + actions: SpotActionsCfg = SpotActionsCfg() + commands: SpotCommandsCfg = SpotCommandsCfg() + + # MDP setting + rewards: SpotRewardsCfg = SpotRewardsCfg() + terminations: SpotTerminationsCfg = SpotTerminationsCfg() + events: SpotEventCfg = SpotEventCfg() + + # Viewer + viewer: ViewerCfg = ViewerCfg(eye=(1.0, 4.0, 1.5), origin_type="asset_body", asset_name="robot", body_name="body") + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # general settings + self.decimation = 10 # 50 Hz + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 0.002 # 500 Hz + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 + self.sim.physics_material.friction_combine_mode = "multiply" + self.sim.physics_material.restitution_combine_mode = "multiply" + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + self.scene.contact_forces.update_period = self.sim.dt + + # switch robot to Spot-With-Arm + self.scene.robot = SPOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/body" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/__init__.py new file mode 100644 index 0000000..e9e05fd --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/__init__.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +gym.register( + id="UW-Velocity-Flat-Arm-Spot-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:SpotFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:SpotFlatPPORunnerCfg", + }, +) + +gym.register( + id="UW-Velocity-Rough-Arm-Spot-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:SpotRoughEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:SpotFlatPPORunnerCfg", + }, +) + +gym.register( + id="UW-Velocity-Flat-Arm-Spot-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:SpotFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:SpotFlatPPORunnerCfg", + }, +) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/__init__.py new file mode 100644 index 0000000..ac860d6 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/rsl_rl_ppo_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000..37c58aa --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 20000 + save_interval = 50 + experiment_name = "spot_flat" + empirical_normalization = False + store_code_state = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=0.5, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.0025, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/flat_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/flat_env_cfg.py new file mode 100644 index 0000000..e585352 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/flat_env_cfg.py @@ -0,0 +1,390 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import isaaclab.sim as sim_utils +import isaaclab.terrains as terrain_gen +from isaaclab.envs import ViewerCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg, SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise +from uwlab.envs.mdp import DefaultJointPositionStaticActionCfg + +import isaaclab_tasks.manager_based.locomotion.velocity.config.spot.mdp as spot_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from uwlab_assets.robots.spot.arm_spot import SPOT_WITH_ARM_CFG # isort: skip + + +COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg( + size=(8.0, 8.0), + border_width=20.0, + num_rows=9, + num_cols=21, + horizontal_scale=0.1, + vertical_scale=0.005, + slope_threshold=0.75, + difficulty_range=(0.0, 1.0), + use_cache=False, + sub_terrains={ + "flat": terrain_gen.MeshPlaneTerrainCfg(proportion=0.2), + "random_rough": terrain_gen.HfRandomUniformTerrainCfg( + proportion=0.2, noise_range=(0.02, 0.05), noise_step=0.02, border_width=0.25 + ), + }, +) + + +@configclass +class SpotActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=["^(?!.*arm0).*$"], scale=0.2, use_default_offset=True + ) + + arm_pos = DefaultJointPositionStaticActionCfg( + asset_name="robot", joint_names=["arm0.*"], scale=1, use_default_offset=True + ) + + +@configclass +class SpotCommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.1, + rel_heading_envs=0.0, + heading_command=False, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0) + ), + ) + + +@configclass +class SpotObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # `` observation terms (order preserved) + base_lin_vel = ObsTerm( + func=mdp.base_lin_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + params={"asset_cfg": SceneEntityCfg("robot")}, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.05, n_max=0.05) + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.5, n_max=0.5) + ) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class SpotEventCfg: + """Configuration for randomization.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.3, 1.0), + "dynamic_friction_range": (0.3, 0.8), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "mass_distribution_params": (-2.5, 2.5), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot"), + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-1.5, 1.5), + "y": (-1.0, 1.0), + "z": (-0.5, 0.5), + "roll": (-0.7, 0.7), + "pitch": (-0.7, 0.7), + "yaw": (-1.0, 1.0), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=spot_mdp.reset_joints_around_default, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-2.5, 2.5), + "asset_cfg": SceneEntityCfg("robot"), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot"), + "velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}, + }, + ) + + +@configclass +class SpotRewardsCfg: + # -- task + air_time = RewardTermCfg( + func=spot_mdp.air_time_reward, + weight=5.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + base_angular_velocity = RewardTermCfg( + func=spot_mdp.base_angular_velocity_reward, + weight=5.0, + params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + base_linear_velocity = RewardTermCfg( + func=spot_mdp.base_linear_velocity_reward, + weight=5.0, + params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + foot_clearance = RewardTermCfg( + func=spot_mdp.foot_clearance_reward, + weight=0.5, + params={ + "std": 0.05, + "tanh_mult": 2.0, + "target_height": 0.1, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + }, + ) + gait = RewardTermCfg( + func=spot_mdp.GaitReward, + weight=10.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + }, + ) + + # -- penalties + action_smoothness = RewardTermCfg(func=spot_mdp.action_smoothness_penalty, weight=-1.0) + air_time_variance = RewardTermCfg( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + base_motion = RewardTermCfg( + func=spot_mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + base_orientation = RewardTermCfg( + func=spot_mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + foot_slip = RewardTermCfg( + func=spot_mdp.foot_slip_penalty, + weight=-0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + joint_acc = RewardTermCfg( + func=spot_mdp.joint_acceleration_penalty, + weight=-1.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + joint_pos = RewardTermCfg( + func=spot_mdp.joint_position_penalty, + weight=-0.7, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 5.0, + "velocity_threshold": 0.5, + }, + ) + joint_torques = RewardTermCfg( + func=spot_mdp.joint_torques_penalty, + weight=-5.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*")}, + ) + joint_vel = RewardTermCfg( + func=spot_mdp.joint_velocity_penalty, + weight=-1.0e-2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + + +@configclass +class SpotTerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + body_contact = DoneTerm( + func=mdp.illegal_contact, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg", "arm0.*"]), + "threshold": 1.0, + }, + ) + terrain_out_of_bounds = DoneTerm( + func=mdp.terrain_out_of_bounds, + params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0}, + time_out=True, + ) + + +@configclass +class SpotFlatEnvCfg(LocomotionVelocityRoughEnvCfg): + + # Basic settings' + observations: SpotObservationsCfg = SpotObservationsCfg() + actions: SpotActionsCfg = SpotActionsCfg() + commands: SpotCommandsCfg = SpotCommandsCfg() + + # MDP setting + rewards: SpotRewardsCfg = SpotRewardsCfg() + terminations: SpotTerminationsCfg = SpotTerminationsCfg() + events: SpotEventCfg = SpotEventCfg() + + # Viewer + viewer: ViewerCfg = ViewerCfg(eye=(1.0, 4.0, 1.5), origin_type="asset_body", asset_name="robot", body_name="body") + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # general settings + self.decimation = 10 # 50 Hz + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 0.002 # 500 Hz + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 + self.sim.physics_material.friction_combine_mode = "multiply" + self.sim.physics_material.restitution_combine_mode = "multiply" + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + self.scene.contact_forces.update_period = self.sim.dt + + # switch robot to Spot-d + self.scene.robot = SPOT_WITH_ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # terrain + self.scene.terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=COBBLESTONE_ROAD_CFG, + max_init_terrain_level=COBBLESTONE_ROAD_CFG.num_rows - 1, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=True, + ) + + # no height scan + self.scene.height_scanner = None + + +class SpotFlatEnvCfg_PLAY(SpotFlatEnvCfg): + def __post_init__(self) -> None: + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 1 + self.scene.env_spacing = 2.5 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + # self.events.base_external_force_torque = None + # self.events.push_robot = None diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/mdp/__init__.py new file mode 100644 index 0000000..8a1ad1b --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/mdp/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the Spot locomotion task.""" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/rough_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/rough_env_cfg.py new file mode 100644 index 0000000..2c950ab --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/config/spot_with_arm/rough_env_cfg.py @@ -0,0 +1,320 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.envs import ViewerCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg, SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise +from uwlab.envs.mdp import DefaultJointPositionStaticActionCfg + +import isaaclab_tasks.manager_based.locomotion.velocity.config.spot.mdp as spot_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from uwlab_assets.robots.spot.arm_spot import SPOT_WITH_ARM_CFG # isort: skip + + +@configclass +class SpotActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=["^(?!.*arm0).*$"], scale=0.2, use_default_offset=True + ) + + arm_pos = DefaultJointPositionStaticActionCfg( + asset_name="robot", joint_names=["arm0.*"], scale=1, use_default_offset=True + ) + + +@configclass +class SpotCommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.1, + rel_heading_envs=0.0, + heading_command=False, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0) + ), + ) + + +@configclass +class SpotObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # `` observation terms (order preserved) + base_lin_vel = ObsTerm( + func=mdp.base_lin_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1) + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + params={"asset_cfg": SceneEntityCfg("robot")}, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.05, n_max=0.05) + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.5, n_max=0.5) + ) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class SpotEventCfg: + """Configuration for randomization.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.3, 1.0), + "dynamic_friction_range": (0.3, 0.8), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "mass_distribution_params": (-2.5, 2.5), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="body"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot"), + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-1.5, 1.5), + "y": (-1.0, 1.0), + "z": (-0.5, 0.5), + "roll": (-0.7, 0.7), + "pitch": (-0.7, 0.7), + "yaw": (-1.0, 1.0), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=spot_mdp.reset_joints_around_default, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-2.5, 2.5), + "asset_cfg": SceneEntityCfg("robot"), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot"), + "velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}, + }, + ) + + +@configclass +class SpotRewardsCfg: + # -- task + air_time = RewardTermCfg( + func=spot_mdp.air_time_reward, + weight=5.0, + params={ + "mode_time": 0.3, + "velocity_threshold": 0.5, + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + }, + ) + base_angular_velocity = RewardTermCfg( + func=spot_mdp.base_angular_velocity_reward, + weight=5.0, + params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + base_linear_velocity = RewardTermCfg( + func=spot_mdp.base_linear_velocity_reward, + weight=5.0, + params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")}, + ) + foot_clearance = RewardTermCfg( + func=spot_mdp.foot_clearance_reward, + weight=0.5, + params={ + "std": 0.05, + "tanh_mult": 2.0, + "target_height": 0.1, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + }, + ) + gait = RewardTermCfg( + func=spot_mdp.GaitReward, + weight=10.0, + params={ + "std": 0.1, + "max_err": 0.2, + "velocity_threshold": 0.5, + "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")), + "asset_cfg": SceneEntityCfg("robot"), + "sensor_cfg": SceneEntityCfg("contact_forces"), + }, + ) + + # -- penalties + action_smoothness = RewardTermCfg(func=spot_mdp.action_smoothness_penalty, weight=-1.0) + air_time_variance = RewardTermCfg( + func=spot_mdp.air_time_variance_penalty, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")}, + ) + base_motion = RewardTermCfg( + func=spot_mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + base_orientation = RewardTermCfg( + func=spot_mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")} + ) + foot_slip = RewardTermCfg( + func=spot_mdp.foot_slip_penalty, + weight=-0.5, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"), + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"), + "threshold": 1.0, + }, + ) + joint_acc = RewardTermCfg( + func=spot_mdp.joint_acceleration_penalty, + weight=-1.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + joint_pos = RewardTermCfg( + func=spot_mdp.joint_position_penalty, + weight=-0.7, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "stand_still_scale": 5.0, + "velocity_threshold": 0.5, + }, + ) + joint_torques = RewardTermCfg( + func=spot_mdp.joint_torques_penalty, + weight=-5.0e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*")}, + ) + joint_vel = RewardTermCfg( + func=spot_mdp.joint_velocity_penalty, + weight=-1.0e-2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")}, + ) + + +@configclass +class SpotTerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + body_contact = DoneTerm( + func=mdp.illegal_contact, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg", "arm0.*"]), + "threshold": 1.0, + }, + ) + terrain_out_of_bounds = DoneTerm( + func=mdp.terrain_out_of_bounds, + params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0}, + time_out=True, + ) + + +@configclass +class SpotRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + + # Basic settings' + observations: SpotObservationsCfg = SpotObservationsCfg() + actions: SpotActionsCfg = SpotActionsCfg() + commands: SpotCommandsCfg = SpotCommandsCfg() + + # MDP setting + rewards: SpotRewardsCfg = SpotRewardsCfg() + terminations: SpotTerminationsCfg = SpotTerminationsCfg() + events: SpotEventCfg = SpotEventCfg() + + # Viewer + viewer: ViewerCfg = ViewerCfg(eye=(1.0, 4.0, 1.5), origin_type="asset_body", asset_name="robot", body_name="body") + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # general settings + self.decimation = 10 # 50 Hz + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 0.002 # 500 Hz + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 + self.sim.physics_material.friction_combine_mode = "multiply" + self.sim.physics_material.restitution_combine_mode = "multiply" + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + self.scene.contact_forces.update_period = self.sim.dt + + # switch robot to Spot-With-Arm + self.scene.robot = SPOT_WITH_ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/body" diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/__init__.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/__init__.py new file mode 100644 index 0000000..1b5fe13 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the locomotion environments.""" + +from isaaclab.envs.mdp import * +from uwlab.envs.mdp import * # noqa: F401, F403 + +from .curriculums import * # noqa: F401, F403 +from .rewards import * # noqa: F401, F403 diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py new file mode 100644 index 0000000..1a29e24 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Common functions that can be used to create curriculum for the learning environment. + +The functions can be passed to the :class:`isaaclab.managers.CurriculumTermCfg` object to enable +the curriculum introduced by the function. +""" + +from __future__ import annotations + +import torch +from collections.abc import Sequence +from typing import TYPE_CHECKING + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.terrains import TerrainImporter + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def terrain_levels_vel( + env: ManagerBasedRLEnv, env_ids: Sequence[int], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> torch.Tensor: + """Curriculum based on the distance the robot walked when commanded to move at a desired velocity. + + This term is used to increase the difficulty of the terrain when the robot walks far enough and decrease the + difficulty when the robot walks less than half of the distance required by the commanded velocity. + + .. note:: + It is only possible to use this term with the terrain type ``generator``. For further information + on different terrain types, check the :class:`isaaclab.terrains.TerrainImporter` class. + + Returns: + The mean terrain level for the given environment ids. + """ + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + terrain: TerrainImporter = env.scene.terrain + command = env.command_manager.get_command("base_velocity") + # compute the distance the robot walked + distance = torch.norm(asset.data.root_pos_w[env_ids, :2] - env.scene.env_origins[env_ids, :2], dim=1) + # robots that walked far enough progress to harder terrains + move_up = distance > terrain.cfg.terrain_generator.size[0] / 2 + # robots that walked less than half of their required distance go to simpler terrains + move_down = distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5 + move_down *= ~move_up + # update terrain levels + terrain.update_env_origins(env_ids, move_up, move_down) + # return the mean terrain level + return torch.mean(terrain.terrain_levels.float()) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/rewards.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/rewards.py new file mode 100644 index 0000000..1a68d14 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/mdp/rewards.py @@ -0,0 +1,93 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +from isaaclab.managers import SceneEntityCfg +from isaaclab.sensors import ContactSensor +from isaaclab.utils.math import quat_rotate_inverse, yaw_quat + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def feet_air_time( + env: ManagerBasedRLEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold: float +) -> torch.Tensor: + """Reward long steps taken by the feet using L2-kernel. + + This function rewards the agent for taking steps that are longer than a threshold. This helps ensure + that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of + the time for which the feet are in the air. + + If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero. + """ + # extract the used quantities (to enable type-hinting) + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + # compute the reward + first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids] + last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids] + reward = torch.sum((last_air_time - threshold) * first_contact, dim=1) + # no reward for zero command + reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1 + return reward + + +def feet_air_time_positive_biped(env, command_name: str, threshold: float, sensor_cfg: SceneEntityCfg) -> torch.Tensor: + """Reward long steps taken by the feet for bipeds. + + This function rewards the agent for taking steps up to a specified threshold and also keep one foot at + a time in the air. + + If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero. + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + # compute the reward + air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids] + contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids] + in_contact = contact_time > 0.0 + in_mode_time = torch.where(in_contact, contact_time, air_time) + single_stance = torch.sum(in_contact.int(), dim=1) == 1 + reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0] + reward = torch.clamp(reward, max=threshold) + # no reward for zero command + reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1 + return reward + + +def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + # Penalize feet sliding + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0 + asset = env.scene[asset_cfg.name] + body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2] + reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1) + return reward + + +def track_lin_vel_xy_yaw_frame_exp( + env, std: float, command_name: str, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> torch.Tensor: + """Reward tracking of linear velocity commands (xy axes) in the gravity aligned robot frame using exponential kernel.""" + # extract the used quantities (to enable type-hinting) + asset = env.scene[asset_cfg.name] + vel_yaw = quat_rotate_inverse(yaw_quat(asset.data.root_quat_w), asset.data.root_lin_vel_w[:, :3]) + lin_vel_error = torch.sum( + torch.square(env.command_manager.get_command(command_name)[:, :2] - vel_yaw[:, :2]), dim=1 + ) + return torch.exp(-lin_vel_error / std**2) + + +def track_ang_vel_z_world_exp( + env, command_name: str, std: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> torch.Tensor: + """Reward tracking of angular velocity commands (yaw) in world frame using exponential kernel.""" + # extract the used quantities (to enable type-hinting) + asset = env.scene[asset_cfg.name] + ang_vel_error = torch.square(env.command_manager.get_command(command_name)[:, 2] - asset.data.root_ang_vel_w[:, 2]) + return torch.exp(-ang_vel_error / std**2) diff --git a/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/velocity_env_cfg.py b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/velocity_env_cfg.py new file mode 100644 index 0000000..e1fcfd2 --- /dev/null +++ b/source/uwlab_tasks/uwlab_tasks/manager_based/locomotion/velocity/velocity_env_cfg.py @@ -0,0 +1,316 @@ +# Copyright (c) 2024-2025, The UW Lab Project Developers. +# All Rights Reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.sensors import ContactSensorCfg, RayCasterCfg, patterns +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise +from uwlab.scene import InteractiveSceneCfg +from uwlab.terrains import TerrainImporterCfg + +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp + +from uwlab.terrains.config.rough import ROUGH_TERRAINS_CFG # isort: skip + + +@configclass +class MySceneCfg(InteractiveSceneCfg): + """Configuration for the terrain scene with a legged robot.""" + + # ground terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=ROUGH_TERRAINS_CFG, + max_init_terrain_level=5, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=False, + ) + + robot: ArticulationCfg = MISSING + # sensors + height_scanner = RayCasterCfg( + prim_path="{ENV_REGEX_NS}/Robot/base", + offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 20.0)), + attach_yaw_only=True, + pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=(1.6, 1.0)), + debug_vis=False, + mesh_prim_paths=["/World/ground"], + ) + contact_forces = ContactSensorCfg(prim_path="{ENV_REGEX_NS}/Robot/.*", history_length=3, track_air_time=True) + # lights + sky_light = AssetBaseCfg( + prim_path="/World/skyLight", + spawn=sim_utils.DomeLightCfg( + intensity=750.0, + texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr", + ), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.02, + rel_heading_envs=1.0, + heading_command=True, + heading_control_stiffness=0.5, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-1.0, 1.0), lin_vel_y=(-1.0, 1.0), ang_vel_z=(-1.0, 1.0), heading=(-math.pi, math.pi) + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1)) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel, noise=Unoise(n_min=-0.2, n_max=0.2)) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) + joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-1.5, n_max=1.5)) + actions = ObsTerm(func=mdp.last_action) + height_scan = ObsTerm( + func=mdp.height_scan, + params={"sensor_cfg": SceneEntityCfg("height_scanner")}, + noise=Unoise(n_min=-0.1, n_max=0.1), + clip=(-1.0, 1.0), + ) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.8, 0.8), + "dynamic_friction_range": (0.6, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="base"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="base"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "z": (-0.5, 0.5), + "roll": (-0.5, 0.5), + "pitch": (-0.5, 0.5), + "yaw": (-0.5, 0.5), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}}, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # -- task + track_lin_vel_xy_exp = RewTerm( + func=mdp.track_lin_vel_xy_exp, weight=1.0, params={"command_name": "base_velocity", "std": math.sqrt(0.25)} + ) + track_ang_vel_z_exp = RewTerm( + func=mdp.track_ang_vel_z_exp, weight=0.5, params={"command_name": "base_velocity", "std": math.sqrt(0.25)} + ) + # -- penalties + lin_vel_z_l2 = RewTerm(func=mdp.lin_vel_z_l2, weight=-2.0) + ang_vel_xy_l2 = RewTerm(func=mdp.ang_vel_xy_l2, weight=-0.05) + dof_torques_l2 = RewTerm(func=mdp.joint_torques_l2, weight=-1.0e-5) + dof_acc_l2 = RewTerm(func=mdp.joint_acc_l2, weight=-2.5e-7) + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + feet_air_time = RewTerm( + func=mdp.feet_air_time, + weight=0.125, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*FOOT"), + "command_name": "base_velocity", + "threshold": 0.5, + }, + ) + undesired_contacts = RewTerm( + func=mdp.undesired_contacts, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0}, + ) + # -- optional penalties + flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=0.0) + dof_pos_limits = RewTerm(func=mdp.joint_pos_limits, weight=0.0) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + base_contact = DoneTerm( + func=mdp.illegal_contact, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="base"), "threshold": 1.0}, + ) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + terrain_levels = CurrTerm(func=mdp.terrain_levels_vel) + + +## +# Environment configuration +## + + +@configclass +class LocomotionVelocityRoughEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the locomotion velocity-tracking environment.""" + + # Scene settings + scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 4 + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 0.005 + self.sim.render_interval = self.decimation + self.sim.disable_contact_processing = True + self.sim.physics_material = self.scene.terrain.physics_material + self.sim.physx.gpu_total_aggregate_pairs_capacity = 2**24 + self.sim.physx.gpu_found_lost_pairs_capacity = 2**24 + self.viewer.eye = (0.0, 0.0, 80) + self.viewer.resolution = (1920, 1080) + # update sensor update periods + # we tick all the sensors based on the smallest update period (physics update period) + if self.scene.height_scanner is not None: + self.scene.height_scanner.update_period = self.decimation * self.sim.dt + if self.scene.contact_forces is not None: + self.scene.contact_forces.update_period = self.sim.dt + + # check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator + # this generates terrains with increasing difficulty and is useful for training + if getattr(self.curriculum, "terrain_levels", None) is not None: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = True + else: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = False