diff --git a/metasim/sim/isaacgym/isaacgym.py b/metasim/sim/isaacgym/isaacgym.py index f75218d0b..571efe84d 100644 --- a/metasim/sim/isaacgym/isaacgym.py +++ b/metasim/sim/isaacgym/isaacgym.py @@ -422,9 +422,8 @@ def _load_robot_assets(self) -> None: asset_options.fix_base_link = self.robots[0].fix_base_link asset_options.disable_gravity = not self.robots[0].enabled_gravity asset_options.flip_visual_attachments = self.robots[0].isaacgym_flip_visual_attachments - asset_options.collapse_fixed_joints = self.robots[0].collapse_fixed_joints + asset_options.collapse_fixed_joints = getattr(self.robots[0], "collapse_fixed_joints", False) asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - # Defaults are set to free movement and will be updated based on the configuration in actuator_cfg below. asset_options.replace_cylinder_with_capsule = self.scenario.sim_params.replace_cylinder_with_capsule robot_asset = self.gym.load_asset(self.sim, asset_root, robot_asset_file, asset_options) # configure robot dofs @@ -761,7 +760,6 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]: camera_states = {} - self.refresh_render() self.gym.start_access_image_tensors(self.sim) for cam_id, cam in enumerate(self.cameras): @@ -864,12 +862,6 @@ def set_actions(self, actions: torch.Tensor) -> None: else: self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(action_input)) - def refresh_render(self) -> None: - # Step the physics - self.gym.simulate(self.sim) - self.gym.fetch_results(self.sim, True) - self._render() - def _simulate_one_physics_step(self): self.gym.simulate(self.sim) if self.device == "cpu": @@ -891,6 +883,8 @@ def _simulate(self) -> None: def _render(self) -> None: """Listen for keyboard events, step graphics and render the environment""" + if self.device != "cpu": + self.gym.fetch_results(self.sim, True) if not self.headless: for evt in self.gym.query_viewer_action_events(self.viewer): if evt.action == "toggle_viewer_sync" and evt.value > 0: diff --git a/metasim/sim/isaacsim/isaacsim.py b/metasim/sim/isaacsim/isaacsim.py index 30d607ad6..2ec4f85aa 100644 --- a/metasim/sim/isaacsim/isaacsim.py +++ b/metasim/sim/isaacsim/isaacsim.py @@ -886,7 +886,8 @@ def _load_terrain(self) -> None: except Exception as e: log.warning(f"Failed to download terrain material {mdl_path}: {e}") - num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding = 8 + num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding num_rows = num_cols # make each tile at least env_spacing (add a margin so robot never touches tile boundary) tile = 1.25 * self.scenario.env_spacing diff --git a/roboverse_learn/rl/configs/rsl_rl/ppo.py b/roboverse_learn/rl/configs/rsl_rl/ppo.py index dc9e2913b..092c7e7d5 100644 --- a/roboverse_learn/rl/configs/rsl_rl/ppo.py +++ b/roboverse_learn/rl/configs/rsl_rl/ppo.py @@ -100,7 +100,7 @@ def __post_init__(self) -> None: self.model_dir = os.path.join("outputs", name, self.task, log_dir) if self.obs_groups is None: - self.obs_groups = {"policy": ["policy"], "critic": ["policy", "critic"]} + self.obs_groups = {"policy": ["policy"], "critic": ["critic"]} # Build runner training config for RSL-RL policy_cfg = self.policy.to_dict() if hasattr(self.policy, "to_dict") else dict(self.policy.__dict__) @@ -129,5 +129,10 @@ def __post_init__(self) -> None: if self.clip_actions is not None: self.train_cfg["clip_actions"] = self.clip_actions + # Sync logger with use_wandb flag + if self.use_wandb: + self.logger = "wandb" + self.train_cfg["logger"] = "wandb" + __all__ = ["RslRlPPOConfig"] diff --git a/roboverse_learn/rl/rsl_rl/eval.py b/roboverse_learn/rl/rsl_rl/eval.py index 3d9a34402..6d018b23c 100644 --- a/roboverse_learn/rl/rsl_rl/eval.py +++ b/roboverse_learn/rl/rsl_rl/eval.py @@ -21,11 +21,11 @@ from roboverse_learn.rl.rsl_rl.env_wrapper import RslRlEnvWrapper from metasim.task.registry import get_task_class -def get_log_dir(robot_name: str, task_name: str, now=None) -> str: - """Get the log directory.""" +def get_log_dir(exp_name: str, task_name: str, now=None) -> str: + """Get the log directory (aligned with ppo.py saving logic).""" if now is None: - now = datetime.datetime.now().strftime("%Y_%m%d_%H%M%S") - log_dir = f"./outputs/{robot_name}/{task_name}/{now}" + now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_dir = f"./outputs/{exp_name}/{task_name}/{now}" if not os.path.exists(log_dir): os.makedirs(log_dir, exist_ok=True) log.info("Log directory: {}", log_dir) @@ -84,10 +84,12 @@ def evaluate(args: RslRlPPOConfig): raise ValueError("Please provide --resume (timestamp/log dir) for evaluation.") # Convert resume string to full log directory path + # Use exp_name to match ppo.py's saving logic (defaults to experiment_name -> task) + exp_name = args.exp_name or args.experiment_name or args.task log_dir = ( args.resume if os.path.isdir(args.resume) - else get_log_dir(robot_name=args.robot, task_name=args.task, now=args.resume) + else get_log_dir(exp_name=exp_name, task_name=args.task, now=args.resume) ) # Use get_load_path helper to handle checkpoint loading logic @@ -109,10 +111,7 @@ def evaluate(args: RslRlPPOConfig): obs = wrapped_env.get_observations() - # Resolve obs_groups (mimicking OnPolicyRunner.__init__) - default_sets = ["critic"] - args.obs_groups = resolve_obs_groups(obs, {}, default_sets) - obs_groups = args.obs_groups + obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["critic"]} # Extract policy config policy_cfg = args.policy diff --git a/roboverse_pack/robots/g1_cfg.py b/roboverse_pack/robots/g1_cfg.py index 31ef1a19b..241bea1b3 100644 --- a/roboverse_pack/robots/g1_cfg.py +++ b/roboverse_pack/robots/g1_cfg.py @@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg): enabled_self_collisions: bool = True isaacgym_read_mjcf = False isaacgym_flip_visual_attachments: bool = False - collapse_fixed_joints: bool = False + collapse_fixed_joints: bool = True actuators: dict[str, BaseActuatorCfg] = { # N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32) diff --git a/roboverse_pack/tasks/humanoid/base/base_legged_robot.py b/roboverse_pack/tasks/humanoid/base/base_legged_robot.py index 6c60f6456..8441690dd 100644 --- a/roboverse_pack/tasks/humanoid/base/base_legged_robot.py +++ b/roboverse_pack/tasks/humanoid/base/base_legged_robot.py @@ -353,11 +353,15 @@ def _post_physics_step(self, env_states: TensorState): # Compute "true" next observations BEFORE reset (for off-policy RL) # This is the observation that would be returned if the env didn't auto-reset true_obs_single, _ = self._compute_task_observations(env_states) - # Temporarily append to queue to compute obs with full history - self.obs_buf_queue.append(true_obs_single) - true_next_obs_with_history = self.obs_buf.clone() - # Remove the temporary observation - self.obs_buf_queue.pop() + # Compute history as if we appended without mutating the queue + if self.obs_buf_queue is None or len(self.obs_buf_queue) == 0: + true_next_obs_with_history = true_obs_single.clone() + else: + obs_queue = list(self.obs_buf_queue) + if self.obs_buf_queue.maxlen is not None and len(obs_queue) == self.obs_buf_queue.maxlen: + obs_queue = obs_queue[1:] + obs_queue.append(true_obs_single) + true_next_obs_with_history = torch.cat(obs_queue, dim=1) # reset envs reset_env_idx = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)