From 9c20ea33f67cdf6fafb7ae699a4008637b08768f Mon Sep 17 00:00:00 2001 From: Xin Ma <1285953028@qq.com> Date: Thu, 18 Jun 2026 18:37:48 +0800 Subject: [PATCH] fix: smooth APPO Go1 joystick actions --- conf/appo/task/go1_joystick_flat/motrix.yaml | 34 +++++++++++++++++++ .../zh_CN/5-reference/5-support_matrix.md | 2 +- src/unilab/envs/locomotion/common/rewards.py | 8 +++++ src/unilab/envs/locomotion/go1/joystick.py | 18 ++++++++++ tests/scripts/test_support_matrix.py | 4 +-- 5 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 conf/appo/task/go1_joystick_flat/motrix.yaml diff --git a/conf/appo/task/go1_joystick_flat/motrix.yaml b/conf/appo/task/go1_joystick_flat/motrix.yaml new file mode 100644 index 000000000..f7343fd5d --- /dev/null +++ b/conf/appo/task/go1_joystick_flat/motrix.yaml @@ -0,0 +1,34 @@ +# @package _global_ +training: + task_name: Go1JoystickFlat + sim_backend: motrix +algo: + num_envs: 1024 + steps_per_env: 24 + max_iterations: 300 + actor: + distribution_cfg: + init_std: 0.5 + algorithm: + learning_rate: 5.0e-4 + entropy_coef: 1.0e-3 + desired_kl: 0.008 +env: + sim_dt: 0.01 + commands: + vel_limit: + - [0.5, 0.0, 0.0] + - [0.5, 0.0, 0.0] +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.2 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + action_rate: -0.015 + action_smooth: -0.01 + similar_to_default: -0.15 + swing_feet_z: 2.0 + tracking_sigma: 0.25 + base_height_target: 0.3 diff --git a/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md b/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md index 995106319..d0a00d0d8 100644 --- a/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md +++ b/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md @@ -92,7 +92,7 @@ uv run scripts/generate_support_matrix.py --write | PPO (mlx) | `go2w_joystick_flat` (go2w joystick flat) | Configured | Configured | | PPO (mlx) | `go2w_joystick_rough` (go2w joystick rough) | Configured | Configured | | PPO (mlx) | `stewart_balance` (stewart balance) | Configured | Configured | -| APPO (torch) | `go1_joystick_flat` (Go1 joystick) | Tested | Registered | +| APPO (torch) | `go1_joystick_flat` (Go1 joystick) | Tested | Tested | | APPO (torch) | `go2_joystick_flat` (Go2 joystick) | Tested | Tested | | APPO (torch) | `g1_walk_flat` (G1 walk flat) | Tested | Registered | | APPO (torch) | `g1_motion_tracking` (G1 motion tracking) | Tested | Tested | diff --git a/src/unilab/envs/locomotion/common/rewards.py b/src/unilab/envs/locomotion/common/rewards.py index c33cd7f5d..77e56df50 100644 --- a/src/unilab/envs/locomotion/common/rewards.py +++ b/src/unilab/envs/locomotion/common/rewards.py @@ -149,6 +149,14 @@ def action_rate(ctx: RewardContext) -> np.ndarray: return np.sum(np.square(current - last), axis=1) # type: ignore[no-any-return] +def action_smooth(ctx: RewardContext) -> np.ndarray: + """Penalty for second-order action changes between timesteps.""" + current = ctx.info["current_actions"] + last = ctx.info["last_actions"] + previous = ctx.info.get("previous_actions", last) + return np.sum(np.square(current - 2.0 * last + previous), axis=1) # type: ignore[no-any-return] + + # ── effort penalties ───────────────────────────────────────────────── diff --git a/src/unilab/envs/locomotion/go1/joystick.py b/src/unilab/envs/locomotion/go1/joystick.py index 8452b61fd..ed049cdb1 100644 --- a/src/unilab/envs/locomotion/go1/joystick.py +++ b/src/unilab/envs/locomotion/go1/joystick.py @@ -142,7 +142,9 @@ def _init_reward_functions(self): "ang_vel_xy": rewards.ang_vel_xy, "base_height": rewards.base_height, "action_rate": rewards.action_rate, + "action_smooth": rewards.action_smooth, "similar_to_default": rewards.similar_to_default, + "contact": self._reward_contact, "swing_feet_z": self._reward_swing_feet_z, } @@ -239,3 +241,19 @@ def _reward_swing_feet_z(self, ctx: RewardContext) -> np.ndarray: swing_rew = np.exp(-height_error / 0.01) * is_swing reward: np.ndarray = np.sum(swing_rew, axis=1) / len(self._cfg.sensor.feet_pos) return reward + + def apply_action(self, actions: np.ndarray, state: NpEnvState) -> np.ndarray: + previous_current = state.info.get("current_actions", np.zeros_like(actions)) + previous_last = state.info.get("last_actions", np.zeros_like(actions)) + state.info["previous_actions"] = previous_last + state.info["last_actions"] = previous_current + state.info["current_actions"] = actions + exec_actions = ( + state.info["last_actions"] + if self._cfg.control_config.simulate_action_latency + else actions + ) + ctrl: np.ndarray = ( + exec_actions * self._cfg.control_config.action_scale + self.default_angles + ) + return ctrl diff --git a/tests/scripts/test_support_matrix.py b/tests/scripts/test_support_matrix.py index 6a1f5464b..caa26308e 100644 --- a/tests/scripts/test_support_matrix.py +++ b/tests/scripts/test_support_matrix.py @@ -20,11 +20,11 @@ def test_support_matrix_marks_go2_ppo_backends_as_tested(): assert row.cells["motrix"].level == EvidenceLevel.TESTED -def test_support_matrix_marks_appo_go1_motrix_as_registered_only(): +def test_support_matrix_marks_appo_go1_backends_as_tested(): row = _row("APPO (torch)", "go1_joystick_flat") assert row.cells["mujoco"].level == EvidenceLevel.TESTED - assert row.cells["motrix"].level == EvidenceLevel.REGISTERED + assert row.cells["motrix"].level == EvidenceLevel.TESTED def test_support_matrix_keeps_uncovered_mlx_tasks_at_configured():