Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions conf/appo/task/go1_joystick_flat/motrix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
training:
task_name: Go1JoystickFlat
sim_backend: motrix
algo:
num_envs: 1024
steps_per_env: 24
max_iterations: 300
actor:
distribution_cfg:
init_std: 0.5
algorithm:
learning_rate: 5.0e-4
entropy_coef: 1.0e-3
desired_kl: 0.008
env:
sim_dt: 0.01
commands:
vel_limit:
- [0.5, 0.0, 0.0]
- [0.5, 0.0, 0.0]
reward:
scales:
tracking_lin_vel: 1.0
tracking_ang_vel: 0.2
lin_vel_z: -5.0
ang_vel_xy: -0.1
base_height: -100.0
action_rate: -0.015
action_smooth: -0.01
similar_to_default: -0.15
swing_feet_z: 2.0
tracking_sigma: 0.25
base_height_target: 0.3
2 changes: 1 addition & 1 deletion docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ uv run scripts/generate_support_matrix.py --write
| PPO (mlx) | `go2w_joystick_flat` (go2w joystick flat) | Configured | Configured |
| PPO (mlx) | `go2w_joystick_rough` (go2w joystick rough) | Configured | Configured |
| PPO (mlx) | `stewart_balance` (stewart balance) | Configured | Configured |
| APPO (torch) | `go1_joystick_flat` (Go1 joystick) | Tested | Registered |
| APPO (torch) | `go1_joystick_flat` (Go1 joystick) | Tested | Tested |
| APPO (torch) | `go2_joystick_flat` (Go2 joystick) | Tested | Tested |
| APPO (torch) | `g1_walk_flat` (G1 walk flat) | Tested | Registered |
| APPO (torch) | `g1_motion_tracking` (G1 motion tracking) | Tested | Tested |
Expand Down
8 changes: 8 additions & 0 deletions src/unilab/envs/locomotion/common/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ def action_rate(ctx: RewardContext) -> np.ndarray:
return np.sum(np.square(current - last), axis=1) # type: ignore[no-any-return]


def action_smooth(ctx: RewardContext) -> np.ndarray:
"""Penalty for second-order action changes between timesteps."""
current = ctx.info["current_actions"]
last = ctx.info["last_actions"]
previous = ctx.info.get("previous_actions", last)
return np.sum(np.square(current - 2.0 * last + previous), axis=1) # type: ignore[no-any-return]


# ── effort penalties ─────────────────────────────────────────────────


Expand Down
18 changes: 18 additions & 0 deletions src/unilab/envs/locomotion/go1/joystick.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def _init_reward_functions(self):
"ang_vel_xy": rewards.ang_vel_xy,
"base_height": rewards.base_height,
"action_rate": rewards.action_rate,
"action_smooth": rewards.action_smooth,
"similar_to_default": rewards.similar_to_default,
"contact": self._reward_contact,
"swing_feet_z": self._reward_swing_feet_z,
}

Expand Down Expand Up @@ -239,3 +241,19 @@ def _reward_swing_feet_z(self, ctx: RewardContext) -> np.ndarray:
swing_rew = np.exp(-height_error / 0.01) * is_swing
reward: np.ndarray = np.sum(swing_rew, axis=1) / len(self._cfg.sensor.feet_pos)
return reward

def apply_action(self, actions: np.ndarray, state: NpEnvState) -> np.ndarray:
previous_current = state.info.get("current_actions", np.zeros_like(actions))
previous_last = state.info.get("last_actions", np.zeros_like(actions))
state.info["previous_actions"] = previous_last
state.info["last_actions"] = previous_current
state.info["current_actions"] = actions
exec_actions = (
state.info["last_actions"]
if self._cfg.control_config.simulate_action_latency
else actions
)
ctrl: np.ndarray = (
exec_actions * self._cfg.control_config.action_scale + self.default_angles
)
return ctrl
4 changes: 2 additions & 2 deletions tests/scripts/test_support_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ def test_support_matrix_marks_go2_ppo_backends_as_tested():
assert row.cells["motrix"].level == EvidenceLevel.TESTED


def test_support_matrix_marks_appo_go1_motrix_as_registered_only():
def test_support_matrix_marks_appo_go1_backends_as_tested():
row = _row("APPO (torch)", "go1_joystick_flat")

assert row.cells["mujoco"].level == EvidenceLevel.TESTED
assert row.cells["motrix"].level == EvidenceLevel.REGISTERED
assert row.cells["motrix"].level == EvidenceLevel.TESTED


def test_support_matrix_keeps_uncovered_mlx_tasks_at_configured():
Expand Down
Loading