RoboVerseOrg · myuansun · Jan 15, 2026 · Jan 14, 2026 · Jan 15, 2026 · Jan 14, 2026
diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py
@@ -422,9 +422,8 @@ def _load_robot_assets(self) -> None:
         asset_options.fix_base_link = self.robots[0].fix_base_link
         asset_options.disable_gravity = not self.robots[0].enabled_gravity
         asset_options.flip_visual_attachments = self.robots[0].isaacgym_flip_visual_attachments
-        asset_options.collapse_fixed_joints = self.robots[0].collapse_fixed_joints
+        asset_options.collapse_fixed_joints = getattr(self.robots[0], "collapse_fixed_joints", False)
         asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-        # Defaults are set to free movement and will be updated based on the configuration in actuator_cfg below.
         asset_options.replace_cylinder_with_capsule = self.scenario.sim_params.replace_cylinder_with_capsule
         robot_asset = self.gym.load_asset(self.sim, asset_root, robot_asset_file, asset_options)
         # configure robot dofs
@@ -761,7 +760,6 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]:
 
         camera_states = {}
 
-        self.refresh_render()
         self.gym.start_access_image_tensors(self.sim)
 
         for cam_id, cam in enumerate(self.cameras):
@@ -864,12 +862,6 @@ def set_actions(self, actions: torch.Tensor) -> None:
         else:
             self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(action_input))
 
-    def refresh_render(self) -> None:
-        # Step the physics
-        self.gym.simulate(self.sim)
-        self.gym.fetch_results(self.sim, True)
-        self._render()
-
     def _simulate_one_physics_step(self):
         self.gym.simulate(self.sim)
         if self.device == "cpu":
@@ -891,6 +883,8 @@ def _simulate(self) -> None:
 
     def _render(self) -> None:
         """Listen for keyboard events, step graphics and render the environment"""
+        if self.device != "cpu":
+            self.gym.fetch_results(self.sim, True)
         if not self.headless:
             for evt in self.gym.query_viewer_action_events(self.viewer):
                 if evt.action == "toggle_viewer_sync" and evt.value > 0:

diff --git a/metasim/sim/isaacsim/isaacsim.py b/metasim/sim/isaacsim/isaacsim.py
@@ -886,7 +886,8 @@ def _load_terrain(self) -> None:
             except Exception as e:
                 log.warning(f"Failed to download terrain material {mdl_path}: {e}")
 
-        num_cols = math.ceil(math.sqrt(self._num_envs))
+        ground_padding = 8
+        num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding
         num_rows = num_cols
         # make each tile at least env_spacing (add a margin so robot never touches tile boundary)
         tile = 1.25 * self.scenario.env_spacing

diff --git a/roboverse_learn/rl/configs/rsl_rl/ppo.py b/roboverse_learn/rl/configs/rsl_rl/ppo.py
@@ -100,7 +100,7 @@ def __post_init__(self) -> None:
             self.model_dir = os.path.join("outputs", name, self.task, log_dir)
 
         if self.obs_groups is None:
-            self.obs_groups = {"policy": ["policy"], "critic": ["policy", "critic"]}
+            self.obs_groups = {"policy": ["policy"], "critic": ["critic"]}
 
         # Build runner training config for RSL-RL
         policy_cfg = self.policy.to_dict() if hasattr(self.policy, "to_dict") else dict(self.policy.__dict__)
@@ -129,5 +129,10 @@ def __post_init__(self) -> None:
         if self.clip_actions is not None:
             self.train_cfg["clip_actions"] = self.clip_actions
 
+        # Sync logger with use_wandb flag
+        if self.use_wandb:
+            self.logger = "wandb"
+            self.train_cfg["logger"] = "wandb"
+
 
 __all__ = ["RslRlPPOConfig"]
diff --git a/roboverse_learn/rl/rsl_rl/eval.py b/roboverse_learn/rl/rsl_rl/eval.py
@@ -21,11 +21,11 @@
 from roboverse_learn.rl.rsl_rl.env_wrapper import RslRlEnvWrapper
 from metasim.task.registry import get_task_class
 
-def get_log_dir(robot_name: str, task_name: str, now=None) -> str:
-    """Get the log directory."""
+def get_log_dir(exp_name: str, task_name: str, now=None) -> str:
+    """Get the log directory (aligned with ppo.py saving logic)."""
     if now is None:
-        now = datetime.datetime.now().strftime("%Y_%m%d_%H%M%S")
-    log_dir = f"./outputs/{robot_name}/{task_name}/{now}"
+        now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    log_dir = f"./outputs/{exp_name}/{task_name}/{now}"
     if not os.path.exists(log_dir):
         os.makedirs(log_dir, exist_ok=True)
     log.info("Log directory: {}", log_dir)
@@ -84,10 +84,12 @@ def evaluate(args: RslRlPPOConfig):
         raise ValueError("Please provide --resume (timestamp/log dir) for evaluation.")
 
     # Convert resume string to full log directory path
+    # Use exp_name to match ppo.py's saving logic (defaults to experiment_name -> task)
+    exp_name = args.exp_name or args.experiment_name or args.task
     log_dir = (
         args.resume
         if os.path.isdir(args.resume)
-        else get_log_dir(robot_name=args.robot, task_name=args.task, now=args.resume)
+        else get_log_dir(exp_name=exp_name, task_name=args.task, now=args.resume)
     )
 
     # Use get_load_path helper to handle checkpoint loading logic
@@ -109,10 +111,7 @@ def evaluate(args: RslRlPPOConfig):
 
     obs = wrapped_env.get_observations()
 
-    # Resolve obs_groups (mimicking OnPolicyRunner.__init__)
-    default_sets = ["critic"]
-    args.obs_groups = resolve_obs_groups(obs, {}, default_sets)
-    obs_groups = args.obs_groups
+    obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["critic"]}
 
     # Extract policy config
     policy_cfg = args.policy

diff --git a/roboverse_pack/robots/g1_cfg.py b/roboverse_pack/robots/g1_cfg.py
@@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg):
     enabled_self_collisions: bool = True
     isaacgym_read_mjcf = False
     isaacgym_flip_visual_attachments: bool = False
-    collapse_fixed_joints: bool = False
+    collapse_fixed_joints: bool = True
 
     actuators: dict[str, BaseActuatorCfg] = {
         # N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32)

diff --git a/roboverse_pack/tasks/humanoid/base/base_legged_robot.py b/roboverse_pack/tasks/humanoid/base/base_legged_robot.py
@@ -353,11 +353,15 @@ def _post_physics_step(self, env_states: TensorState):
         # Compute "true" next observations BEFORE reset (for off-policy RL)
         # This is the observation that would be returned if the env didn't auto-reset
         true_obs_single, _ = self._compute_task_observations(env_states)
-        # Temporarily append to queue to compute obs with full history
-        self.obs_buf_queue.append(true_obs_single)
-        true_next_obs_with_history = self.obs_buf.clone()
-        # Remove the temporary observation
-        self.obs_buf_queue.pop()
+        # Compute history as if we appended without mutating the queue
+        if self.obs_buf_queue is None or len(self.obs_buf_queue) == 0:
+            true_next_obs_with_history = true_obs_single.clone()
+        else:
+            obs_queue = list(self.obs_buf_queue)
+            if self.obs_buf_queue.maxlen is not None and len(obs_queue) == self.obs_buf_queue.maxlen:
+                obs_queue = obs_queue[1:]
+            obs_queue.append(true_obs_single)
+            true_next_obs_with_history = torch.cat(obs_queue, dim=1)
 
         # reset envs
         reset_env_idx = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)