Skip to content
12 changes: 3 additions & 9 deletions metasim/sim/isaacgym/isaacgym.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,8 @@ def _load_robot_assets(self) -> None:
asset_options.fix_base_link = self.robots[0].fix_base_link
asset_options.disable_gravity = not self.robots[0].enabled_gravity
asset_options.flip_visual_attachments = self.robots[0].isaacgym_flip_visual_attachments
asset_options.collapse_fixed_joints = self.robots[0].collapse_fixed_joints
asset_options.collapse_fixed_joints = getattr(self.robots[0], "collapse_fixed_joints", False)
asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
# Defaults are set to free movement and will be updated based on the configuration in actuator_cfg below.
asset_options.replace_cylinder_with_capsule = self.scenario.sim_params.replace_cylinder_with_capsule
robot_asset = self.gym.load_asset(self.sim, asset_root, robot_asset_file, asset_options)
# configure robot dofs
Expand Down Expand Up @@ -761,7 +760,6 @@ def _get_states(self, env_ids: list[int] | None = None) -> list[DictEnvState]:

camera_states = {}

self.refresh_render()
self.gym.start_access_image_tensors(self.sim)

for cam_id, cam in enumerate(self.cameras):
Expand Down Expand Up @@ -864,12 +862,6 @@ def set_actions(self, actions: torch.Tensor) -> None:
else:
self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(action_input))

def refresh_render(self) -> None:
# Step the physics
self.gym.simulate(self.sim)
self.gym.fetch_results(self.sim, True)
self._render()

def _simulate_one_physics_step(self):
self.gym.simulate(self.sim)
if self.device == "cpu":
Expand All @@ -891,6 +883,8 @@ def _simulate(self) -> None:

def _render(self) -> None:
"""Listen for keyboard events, step graphics and render the environment"""
if self.device != "cpu":
self.gym.fetch_results(self.sim, True)
if not self.headless:
for evt in self.gym.query_viewer_action_events(self.viewer):
if evt.action == "toggle_viewer_sync" and evt.value > 0:
Expand Down
3 changes: 2 additions & 1 deletion metasim/sim/isaacsim/isaacsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,8 @@ def _load_terrain(self) -> None:
except Exception as e:
log.warning(f"Failed to download terrain material {mdl_path}: {e}")

num_cols = math.ceil(math.sqrt(self._num_envs))
ground_padding = 8
num_cols = math.ceil(math.sqrt(self._num_envs)) + ground_padding
num_rows = num_cols
# make each tile at least env_spacing (add a margin so robot never touches tile boundary)
tile = 1.25 * self.scenario.env_spacing
Expand Down
7 changes: 6 additions & 1 deletion roboverse_learn/rl/configs/rsl_rl/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __post_init__(self) -> None:
self.model_dir = os.path.join("outputs", name, self.task, log_dir)

if self.obs_groups is None:
self.obs_groups = {"policy": ["policy"], "critic": ["policy", "critic"]}
self.obs_groups = {"policy": ["policy"], "critic": ["critic"]}

# Build runner training config for RSL-RL
policy_cfg = self.policy.to_dict() if hasattr(self.policy, "to_dict") else dict(self.policy.__dict__)
Expand Down Expand Up @@ -129,5 +129,10 @@ def __post_init__(self) -> None:
if self.clip_actions is not None:
self.train_cfg["clip_actions"] = self.clip_actions

# Sync logger with use_wandb flag
if self.use_wandb:
self.logger = "wandb"
self.train_cfg["logger"] = "wandb"


__all__ = ["RslRlPPOConfig"]
17 changes: 8 additions & 9 deletions roboverse_learn/rl/rsl_rl/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
from roboverse_learn.rl.rsl_rl.env_wrapper import RslRlEnvWrapper
from metasim.task.registry import get_task_class

def get_log_dir(robot_name: str, task_name: str, now=None) -> str:
"""Get the log directory."""
def get_log_dir(exp_name: str, task_name: str, now=None) -> str:
"""Get the log directory (aligned with ppo.py saving logic)."""
if now is None:
now = datetime.datetime.now().strftime("%Y_%m%d_%H%M%S")
log_dir = f"./outputs/{robot_name}/{task_name}/{now}"
now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_dir = f"./outputs/{exp_name}/{task_name}/{now}"
if not os.path.exists(log_dir):
os.makedirs(log_dir, exist_ok=True)
log.info("Log directory: {}", log_dir)
Expand Down Expand Up @@ -84,10 +84,12 @@ def evaluate(args: RslRlPPOConfig):
raise ValueError("Please provide --resume (timestamp/log dir) for evaluation.")

# Convert resume string to full log directory path
# Use exp_name to match ppo.py's saving logic (defaults to experiment_name -> task)
exp_name = args.exp_name or args.experiment_name or args.task
log_dir = (
args.resume
if os.path.isdir(args.resume)
else get_log_dir(robot_name=args.robot, task_name=args.task, now=args.resume)
else get_log_dir(exp_name=exp_name, task_name=args.task, now=args.resume)
)

# Use get_load_path helper to handle checkpoint loading logic
Expand All @@ -109,10 +111,7 @@ def evaluate(args: RslRlPPOConfig):

obs = wrapped_env.get_observations()

# Resolve obs_groups (mimicking OnPolicyRunner.__init__)
default_sets = ["critic"]
args.obs_groups = resolve_obs_groups(obs, {}, default_sets)
obs_groups = args.obs_groups
obs_groups = args.obs_groups or {"policy": ["policy"], "critic": ["critic"]}

# Extract policy config
policy_cfg = args.policy
Expand Down
2 changes: 1 addition & 1 deletion roboverse_pack/robots/g1_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class G1Dof12Cfg(RobotCfg):
enabled_self_collisions: bool = True
isaacgym_read_mjcf = False
isaacgym_flip_visual_attachments: bool = False
collapse_fixed_joints: bool = False
collapse_fixed_joints: bool = True

actuators: dict[str, BaseActuatorCfg] = {
# N7520-14.3: hip_pitch, hip_yaw (stiffness 100, damping 2, torque 88, vel 32)
Expand Down
14 changes: 9 additions & 5 deletions roboverse_pack/tasks/humanoid/base/base_legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,15 @@ def _post_physics_step(self, env_states: TensorState):
# Compute "true" next observations BEFORE reset (for off-policy RL)
# This is the observation that would be returned if the env didn't auto-reset
true_obs_single, _ = self._compute_task_observations(env_states)
# Temporarily append to queue to compute obs with full history
self.obs_buf_queue.append(true_obs_single)
true_next_obs_with_history = self.obs_buf.clone()
# Remove the temporary observation
self.obs_buf_queue.pop()
# Compute history as if we appended without mutating the queue
if self.obs_buf_queue is None or len(self.obs_buf_queue) == 0:
true_next_obs_with_history = true_obs_single.clone()
else:
obs_queue = list(self.obs_buf_queue)
if self.obs_buf_queue.maxlen is not None and len(obs_queue) == self.obs_buf_queue.maxlen:
obs_queue = obs_queue[1:]
obs_queue.append(true_obs_single)
true_next_obs_with_history = torch.cat(obs_queue, dim=1)

# reset envs
reset_env_idx = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
Expand Down