From 72447e6d9da13eab2660fd4edb5c367f6d66f2cf Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Wed, 14 Jan 2026 15:55:57 +0800 Subject: [PATCH 01/10] [fix] wandb logging --- roboverse_learn/rl/configs/rsl_rl/ppo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/roboverse_learn/rl/configs/rsl_rl/ppo.py b/roboverse_learn/rl/configs/rsl_rl/ppo.py index dc9e2913b..a0ecb4b2b 100644 --- a/roboverse_learn/rl/configs/rsl_rl/ppo.py +++ b/roboverse_learn/rl/configs/rsl_rl/ppo.py @@ -129,5 +129,10 @@ def __post_init__(self) -> None: if self.clip_actions is not None: self.train_cfg["clip_actions"] = self.clip_actions + # Sync logger with use_wandb flag + if self.use_wandb: + self.logger = "wandb" + self.train_cfg["logger"] = "wandb" + __all__ = ["RslRlPPOConfig"] From a435c4dcd5eb8e774b729fd3960b41e39270821a Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 15:30:42 +0800 Subject: [PATCH 02/10] [fix] optimize contact sensor & fix g1_dof12 observation buffer --- .../tasks/humanoid/base/base_legged_robot.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/roboverse_pack/tasks/humanoid/base/base_legged_robot.py b/roboverse_pack/tasks/humanoid/base/base_legged_robot.py index 6c60f6456..8441690dd 100644 --- a/roboverse_pack/tasks/humanoid/base/base_legged_robot.py +++ b/roboverse_pack/tasks/humanoid/base/base_legged_robot.py @@ -353,11 +353,15 @@ def _post_physics_step(self, env_states: TensorState): # Compute "true" next observations BEFORE reset (for off-policy RL) # This is the observation that would be returned if the env didn't auto-reset true_obs_single, _ = self._compute_task_observations(env_states) - # Temporarily append to queue to compute obs with full history - self.obs_buf_queue.append(true_obs_single) - true_next_obs_with_history = self.obs_buf.clone() - # Remove the temporary observation - self.obs_buf_queue.pop() + # Compute history as if we appended without mutating the queue + if self.obs_buf_queue is None or len(self.obs_buf_queue) == 0: + true_next_obs_with_history = true_obs_single.clone() + else: + obs_queue = list(self.obs_buf_queue) + if self.obs_buf_queue.maxlen is not None and len(obs_queue) == self.obs_buf_queue.maxlen: + obs_queue = obs_queue[1:] + obs_queue.append(true_obs_single) + true_next_obs_with_history = torch.cat(obs_queue, dim=1) # reset envs reset_env_idx = self.reset_buf.nonzero(as_tuple=False).squeeze(-1) From 716ef1d6b6aa621bf44a7e0f58c952f1a8262fcb Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Wed, 14 Jan 2026 23:34:55 +0800 Subject: [PATCH 03/10] [fix] fix isaacgym CUDA error --- roboverse_pack/robots/g1_cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roboverse_pack/robots/g1_cfg.py b/roboverse_pack/robots/g1_cfg.py index 31ef1a19b..9ac912301 100644 --- a/roboverse_pack/robots/g1_cfg.py +++ b/roboverse_pack/robots/g1_cfg.py @@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg): enabled_self_collisions: bool = True isaacgym_read_mjcf = False isaacgym_flip_visual_attachments: bool = False - collapse_fixed_joints: bool = False + collapse_fixed_joints: bool = True # old_work: True (reduces rigid bodies -> fewer contacts) actuators: dict[str, BaseActuatorCfg] = { # N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32) From 83f4024896f41b882a87263fc5983ffb9c7076af Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 13:03:25 +0800 Subject: [PATCH 04/10] [update] only set collapse_fixed_joints to True for isaacgym --- metasim/sim/isaacgym/isaacgym.py | 4 +++- roboverse_pack/robots/g1_cfg.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index f75218d0b..3240790ec 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -422,7 +422,9 @@ def _load_robot_assets(self) -> None: asset_options.fix_base_link = self.robots[0].fix_base_link asset_options.disable_gravity = not self.robots[0].enabled_gravity asset_options.flip_visual_attachments = self.robots[0].isaacgym_flip_visual_attachments - asset_options.collapse_fixed_joints = self.robots[0].collapse_fixed_joints + # NOTE: Always collapse fixed joints in IsaacGym to reduce rigid body count. + # This prevents PhysX contact buffer overflow at scale (for g1_dof29 over 2048+ envs). + asset_options.collapse_fixed_joints = True asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE # Defaults are set to free movement and will be updated based on the configuration in actuator_cfg below. asset_options.replace_cylinder_with_capsule = self.scenario.sim_params.replace_cylinder_with_capsule diff --git a/roboverse_pack/robots/g1_cfg.py b/roboverse_pack/robots/g1_cfg.py index 9ac912301..31ef1a19b 100644 --- a/roboverse_pack/robots/g1_cfg.py +++ b/roboverse_pack/robots/g1_cfg.py @@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg): enabled_self_collisions: bool = True isaacgym_read_mjcf = False isaacgym_flip_visual_attachments: bool = False - collapse_fixed_joints: bool = True # old_work: True (reduces rigid bodies -> fewer contacts) + collapse_fixed_joints: bool = False actuators: dict[str, BaseActuatorCfg] = { # N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32) From 3808db7f5415554dfff1ab08c5b19d26f3e9a116 Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 13:54:43 +0800 Subject: [PATCH 05/10] [fix] align checkpoint directory of eval.py --- roboverse_learn/rl/rsl_rl/eval.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/roboverse_learn/rl/rsl_rl/eval.py b/roboverse_learn/rl/rsl_rl/eval.py index 3d9a34402..5baaf55f6 100644 --- a/roboverse_learn/rl/rsl_rl/eval.py +++ b/roboverse_learn/rl/rsl_rl/eval.py @@ -21,11 +21,11 @@ from roboverse_learn.rl.rsl_rl.env_wrapper import RslRlEnvWrapper from metasim.task.registry import get_task_class -def get_log_dir(robot_name: str, task_name: str, now=None) -> str: - """Get the log directory.""" +def get_log_dir(exp_name: str, task_name: str, now=None) -> str: + """Get the log directory (aligned with ppo.py saving logic).""" if now is None: - now = datetime.datetime.now().strftime("%Y_%m%d_%H%M%S") - log_dir = f"./outputs/{robot_name}/{task_name}/{now}" + now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_dir = f"./outputs/{exp_name}/{task_name}/{now}" if not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) log.info("Log directory: {}", log_dir) @@ -84,10 +84,12 @@ def evaluate(args: RslRlPPOConfig): raise ValueError("Please provide --resume (timestamp/log dir) for evaluation.") # Convert resume string to full log directory path + # Use exp_name to match ppo.py's saving logic (defaults to experiment_name -> task) + exp_name = args.exp_name or args.experiment_name or args.task log_dir = ( args.resume if os.path.isdir(args.resume) - else get_log_dir(robot_name=args.robot, task_name=args.task, now=args.resume) + else get_log_dir(exp_name=exp_name, task_name=args.task, now=args.resume) ) # Use get_load_path helper to handle checkpoint loading logic @@ -109,10 +111,9 @@ def evaluate(args: RslRlPPOConfig): obs = wrapped_env.get_observations() - # Resolve obs_groups (mimicking OnPolicyRunner.__init__) - default_sets = ["critic"] - args.obs_groups = resolve_obs_groups(obs, {}, default_sets) - obs_groups = args.obs_groups + # Use the same obs_groups as training (from RslRlPPOConfig.__post_init__) + # critic gets both policy and critic observations concatenated + obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["policy", "critic"]} # Extract policy config policy_cfg = args.policy From bc5e6f0ecf8c8468440cac5055a804cdec9c55a9 Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 14:48:08 +0800 Subject: [PATCH 06/10] [fix] obs_groups & collapse fixed joints for g1 --- metasim/sim/isaacgym/isaacgym.py | 5 +---- roboverse_learn/rl/configs/rsl_rl/ppo.py | 2 +- roboverse_pack/robots/g1_cfg.py | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index 3240790ec..d700efc1b 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -422,11 +422,8 @@ def _load_robot_assets(self) -> None: asset_options.fix_base_link = self.robots[0].fix_base_link asset_options.disable_gravity = not self.robots[0].enabled_gravity asset_options.flip_visual_attachments = self.robots[0].isaacgym_flip_visual_attachments - # NOTE: Always collapse fixed joints in IsaacGym to reduce rigid body count. - # This prevents PhysX contact buffer overflow at scale (for g1_dof29 over 2048+ envs). - asset_options.collapse_fixed_joints = True + asset_options.collapse_fixed_joints = getattr(self.robots[0], "collapse_fixed_joints", False) asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - # Defaults are set to free movement and will be updated based on the configuration in actuator_cfg below. asset_options.replace_cylinder_with_capsule = self.scenario.sim_params.replace_cylinder_with_capsule robot_asset = self.gym.load_asset(self.sim, asset_root, robot_asset_file, asset_options) # configure robot dofs diff --git a/roboverse_learn/rl/configs/rsl_rl/ppo.py b/roboverse_learn/rl/configs/rsl_rl/ppo.py index a0ecb4b2b..092c7e7d5 100644 --- a/roboverse_learn/rl/configs/rsl_rl/ppo.py +++ b/roboverse_learn/rl/configs/rsl_rl/ppo.py @@ -100,7 +100,7 @@ def __post_init__(self) -> None: self.model_dir = os.path.join("outputs", name, self.task, log_dir) if self.obs_groups is None: - self.obs_groups = {"policy": ["policy"], "critic": ["policy", "critic"]} + self.obs_groups = {"policy": ["policy"], "critic": ["critic"]} # Build runner training config for RSL-RL policy_cfg = self.policy.to_dict() if hasattr(self.policy, "to_dict") else dict(self.policy.__dict__) diff --git a/roboverse_pack/robots/g1_cfg.py b/roboverse_pack/robots/g1_cfg.py index 31ef1a19b..241bea1b3 100644 --- a/roboverse_pack/robots/g1_cfg.py +++ b/roboverse_pack/robots/g1_cfg.py @@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg): enabled_self_collisions: bool = True isaacgym_read_mjcf = False isaacgym_flip_visual_attachments: bool = False - collapse_fixed_joints: bool = False + collapse_fixed_joints: bool = True actuators: dict[str, BaseActuatorCfg] = { # N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32) From 05b27df68125435d19e0ef288886884595c5caac Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 16:09:09 +0800 Subject: [PATCH 07/10] [fix] isaacgym render with redundant physics step --- metasim/sim/isaacgym/isaacgym.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index d700efc1b..571efe84d 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -760,7 +760,6 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]: camera_states = {} - self.refresh_render() self.gym.start_access_image_tensors(self.sim) for cam_id, cam in enumerate(self.cameras): @@ -863,12 +862,6 @@ def set_actions(self, actions: torch.Tensor) -> None: else: self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(action_input)) - def refresh_render(self) -> None: - # Step the physics - self.gym.simulate(self.sim) - self.gym.fetch_results(self.sim, True) - self._render() - def _simulate_one_physics_step(self): self.gym.simulate(self.sim) if self.device == "cpu": @@ -890,6 +883,8 @@ def _simulate(self) -> None: def _render(self) -> None: """Listen for keyboard events, step graphics and render the environment""" + if self.device != "cpu": + self.gym.fetch_results(self.sim, True) if not self.headless: for evt in self.gym.query_viewer_action_events(self.viewer): if evt.action == "toggle_viewer_sync" and evt.value > 0: From 3adbb01885e122a93c3fb0f3c99cd48ee691fcc1 Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 18:32:49 +0800 Subject: [PATCH 08/10] [fix] add ground padding for isaacsim --- metasim/sim/isaacsim/isaacsim.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metasim/sim/isaacsim/isaacsim.py b/metasim/sim/isaacsim/isaacsim.py index 30d607ad6..1d2d57a6d 100644 --- a/metasim/sim/isaacsim/isaacsim.py +++ b/metasim/sim/isaacsim/isaacsim.py @@ -886,7 +886,8 @@ def _load_terrain(self) -> None: except Exception as e: log.warning(f"Failed to download terrain material {mdl_path}: {e}") - num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding = 4 + num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding num_rows = num_cols # make each tile at least env_spacing (add a margin so robot never touches tile boundary) tile = 1.25 * self.scenario.env_spacing From 02e6ad51d6a80970158ed20884114d2059e1bfd7 Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 18:35:13 +0800 Subject: [PATCH 09/10] [update] increase ground padding of isaacsim --- metasim/sim/isaacsim/isaacsim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metasim/sim/isaacsim/isaacsim.py b/metasim/sim/isaacsim/isaacsim.py index 1d2d57a6d..2ec4f85aa 100644 --- a/metasim/sim/isaacsim/isaacsim.py +++ b/metasim/sim/isaacsim/isaacsim.py @@ -886,7 +886,7 @@ def _load_terrain(self) -> None: except Exception as e: log.warning(f"Failed to download terrain material {mdl_path}: {e}") - ground_padding = 4 + ground_padding = 8 num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding num_rows = num_cols # make each tile at least env_spacing (add a margin so robot never touches tile boundary) From f1944d1247e8dd60405be3e53ccbe711fdd0bb55 Mon Sep 17 00:00:00 2001 From: myuansun <1273414643@qq.com> Date: Thu, 15 Jan 2026 21:17:51 +0800 Subject: [PATCH 10/10] [update] change default obs_group in eval.py --- roboverse_learn/rl/rsl_rl/eval.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/roboverse_learn/rl/rsl_rl/eval.py b/roboverse_learn/rl/rsl_rl/eval.py index 5baaf55f6..6d018b23c 100644 --- a/roboverse_learn/rl/rsl_rl/eval.py +++ b/roboverse_learn/rl/rsl_rl/eval.py @@ -111,9 +111,7 @@ def evaluate(args: RslRlPPOConfig): obs = wrapped_env.get_observations() - # Use the same obs_groups as training (from RslRlPPOConfig.__post_init__) - # critic gets both policy and critic observations concatenated - obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["policy", "critic"]} + obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["critic"]} # Extract policy config policy_cfg = args.policy