diff --git a/README.md b/README.md index 02d615c..296d719 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,17 @@ Similar to basic legged-gym based codebase, the install pipeline is: python scripts/play.py --task=h1:jumpjack --num_envs=3 ``` + +### Sim2sim + +- go to `legged_gym/legged_gym` + + ``` + python scripts/sim2sim.py h1.yaml + ``` + + + ## Note Since our framework is quite intuitive and we *de-engineered* a lot of things, (i.e., we do not use certain engineering tricks that can be applied to specific tasks such as terrain curriculum) to showcase the framework's capability, we provide a clap-and-dance example here for the reward and MDP implementations, and encourage anyone to engineer the specific environments for better performance on their own applications. diff --git a/legged_gym/legged_gym/MUJOCO_LOG.TXT b/legged_gym/legged_gym/MUJOCO_LOG.TXT new file mode 100644 index 0000000..67adba8 --- /dev/null +++ b/legged_gym/legged_gym/MUJOCO_LOG.TXT @@ -0,0 +1,15 @@ +Tue Dec 10 21:45:18 2024 +WARNING: Nan, Inf or huge value in QACC at DOF 14. The simulation is unstable. Time = 0.0550. + +Tue Dec 10 21:46:55 2024 +WARNING: Nan, Inf or huge value in QACC at DOF 0. The simulation is unstable. Time = 0.0550. + +Tue Dec 10 21:47:52 2024 +WARNING: Nan, Inf or huge value in QACC at DOF 0. The simulation is unstable. Time = 0.0550. + +Tue Dec 10 21:49:46 2024 +WARNING: Nan, Inf or huge value in QACC at DOF 0. The simulation is unstable. Time = 0.0550. + +Tue Dec 10 21:50:16 2024 +WARNING: Nan, Inf or huge value in QACC at DOF 0. The simulation is unstable. Time = 0.0550. + diff --git a/legged_gym/legged_gym/envs/base/h1_jumpjack.py b/legged_gym/legged_gym/envs/base/h1_jumpjack.py index b4f018d..219d082 100644 --- a/legged_gym/legged_gym/envs/base/h1_jumpjack.py +++ b/legged_gym/legged_gym/envs/base/h1_jumpjack.py @@ -157,6 +157,13 @@ def compute_observations(self): """ #todo: deal with 4 ee self.phase = self.contact_sequence[torch.arange(self.num_envs), :, self.current_contact_goal[:,0]].float() + print('self.contact_sequence:', self.contact_sequence) + print('self.contact_sequence.shape:', self.contact_sequence.shape) + print('self.current_contact_goal[:,0]:',self.current_contact_goal[:,0]) + print('self.phase:', self.phase) + + # print('self.dof_bias', self.dof_bias) + self.obs_buf = torch.cat(( (self.dof_pos - self.dof_bias) * self.obs_scales.dof_pos, # 19 self.dof_vel * self.obs_scales.dof_vel, # 19 @@ -169,8 +176,13 @@ def compute_observations(self): self.joint_hist[:,1,:], # 57 self.joint_hist[:,2,:], # 57 ),dim=-1) + + obs_buf_denoise = self.obs_buf.clone() + + # print('h1_jumpjack self.obs_buf.shape', self.obs_buf.shape) + # print('h1_jumpjack self.base_lin_vel * self.obs_scales.lin_vel', self.base_lin_vel * self.obs_scales.lin_vel) # add noise if needed if self.add_noise: diff --git a/legged_gym/legged_gym/envs/base/legged_robot.py b/legged_gym/legged_gym/envs/base/legged_robot.py index 557f938..4c5a859 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot.py +++ b/legged_gym/legged_gym/envs/base/legged_robot.py @@ -789,6 +789,7 @@ def _init_buffers(self): if self.cfg.control.control_type in ["P", "V"]: print(f"PD gain of joint {name} were not defined, setting them to zero") self.default_dof_pos = self.default_dof_pos.unsqueeze(0) + print('Default joint angles:', self.default_dof_pos) self.forward_vec = to_torch([1., 0., 0.], device=self.device).repeat((self.num_envs, 1)) self.dof_bias = torch.zeros(self.num_envs, self.num_dof, device=self.device) @@ -1007,6 +1008,10 @@ def _get_env_origins(self): def _parse_cfg(self, cfg): self.dt = self.cfg.control.decimation * self.sim_params.dt + print('Decimation:', self.cfg.control.decimation) + print('Simulation dt:', self.sim_params.dt) + print('Control dt:', self.dt) + self.obs_scales = self.cfg.normalization.obs_scales self.reward_scales = class_to_dict(self.cfg.rewards.scales) self.command_ranges = class_to_dict(self.cfg.commands.ranges) diff --git a/legged_gym/legged_gym/scripts/configs/h1.yaml b/legged_gym/legged_gym/scripts/configs/h1.yaml new file mode 100644 index 0000000..3639f45 --- /dev/null +++ b/legged_gym/legged_gym/scripts/configs/h1.yaml @@ -0,0 +1,33 @@ +# +policy_path: "{LEGGED_GYM_ROOT_DIR}/logs/h1:jjack/exported/policies/24_12_09_12-01-45_model_25800.pt" +xml_path: "{LEGGED_GYM_ROOT_DIR}/resources/robots/h1/xml/scene.xml" +# Total simulation time +simulation_duration: 60.0 +# Simulation time step +simulation_dt: 0.005 +# Controller update frequency (meets the requirement of simulation_dt * controll_decimation=0.02; 50Hz) +control_decimation: 4 + +kps: [200, 200, 200, 300, 40, 200, 200, 200, 300, 40,300,100,100,100,100,100,100,100,100] +kds: [5, 5, 5, 6, 2, 5, 5, 5, 6, 2,6,2,2,2,2,2,2,2,2] + +default_angles: [0, 0.0, -0.4 , 0.8, -0.4, + 0, 0.0, -0.4, 0.8, -0.4, + 0, + 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0] + +ang_vel_scale: 1.0 +lin_vel_scale: 1.0 +dof_pos_scale: 1.0 +dof_vel_scale: 1.0 +action_scale: 0.25 +num_actions: 19 +num_obs: 187 + +joint_hist_len: 5 +clip_observations: 100 +clip_actions: 100 + +########## + diff --git a/legged_gym/legged_gym/scripts/play.py b/legged_gym/legged_gym/scripts/play.py index 7538248..d75665b 100644 --- a/legged_gym/legged_gym/scripts/play.py +++ b/legged_gym/legged_gym/scripts/play.py @@ -80,6 +80,7 @@ def play(args): [0, 1,0,0], [0, 0,0,1]] env.contact_sequence[0,:,:] = torch.tensor(dance_steps).to(env.device).repeat(1,25) + print('Dance sequence:', env.contact_sequence[0]) env.period_contact[0] = 0.4 logger = Logger(env.dt) @@ -133,6 +134,7 @@ def on_press(key): # actions[:, 15:16] += 10. # print(actions) obs, rews, dones, infos = env.step(actions.detach()) + # print('obs.shape', obs.shape) if groupdance: obs[1:,47:51]= obs[0,47:51].clone().unsqueeze(0) left = obs[1:4,[47,49]] diff --git a/legged_gym/legged_gym/scripts/sim2sim.py b/legged_gym/legged_gym/scripts/sim2sim.py new file mode 100644 index 0000000..3e3ee74 --- /dev/null +++ b/legged_gym/legged_gym/scripts/sim2sim.py @@ -0,0 +1,215 @@ +# import time + +# import mujoco.viewer +# import mujoco +# import numpy as np +# from legged_gym import LEGGED_GYM_ROOT_DIR +# import torch +# import yaml + +import sys +# from legged_gym import LEGGED_GYM_ROOT_DIR +import os + +from legged_gym.envs import * +# from legged_gym.utils import get_args, export_policy_as_jit, task_registry, Logger, diff_quat + +import numpy as np +import torch +# from termcolor import colored + +import time + +import mujoco.viewer +import mujoco +import yaml +from legged_gym.envs.base.lpf import ActionFilterButterTorch + +def get_gravity_orientation(quaternion): + qw = quaternion[0] + qx = quaternion[1] + qy = quaternion[2] + qz = quaternion[3] + + gravity_orientation = np.zeros(3) + + gravity_orientation[0] = 2 * (-qz * qx + qw * qy) + gravity_orientation[1] = -2 * (qz * qy + qw * qx) + gravity_orientation[2] = 1 - 2 * (qw * qw + qz * qz) + + return gravity_orientation + + + +def pd_control(target_q, q, kp, target_dq, dq, kd): + """Calculates torques from position commands""" + return (target_q - q) * kp + (target_dq - dq) * kd + + +def global_obs(base_pos,base_quat): + # x y yaw + # xy = base_pos[:,:2] - env_origins[:,:2] + xy = base_pos[:2] + qw, qx, qy, qz, = base_quat[ 0], base_quat[1], base_quat[2], base_quat[3] + yaw = np.arctan2(2 * (qw * qz + qx * qy), 1 - 2 * (qy * qy + qz * qz)) + yaw = np.array([yaw]) + return np.concatenate([xy, yaw], axis=-1) +if __name__ == "__main__": + # get config file name from command line + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("config_file", type=str, help="config file name in the config folder") + args = parser.parse_args() + config_file = args.config_file + with open(f"{LEGGED_GYM_ROOT_DIR}/legged_gym/scripts/configs/{config_file}", "r") as f: + config = yaml.load(f, Loader=yaml.FullLoader) + print('config:',config) + policy_path = config["policy_path"].replace("{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR) + xml_path = config["xml_path"].replace("{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR) + + simulation_duration = config["simulation_duration"] + simulation_dt = config["simulation_dt"] + control_decimation = config["control_decimation"] + + kps = np.array(config["kps"], dtype=np.float32) + kds = np.array(config["kds"], dtype=np.float32) + + default_angles = np.array(config["default_angles"], dtype=np.float32) + + ang_vel_scale = config["ang_vel_scale"] + lin_vel_scale = config["lin_vel_scale"] + + dof_pos_scale = config["dof_pos_scale"] + dof_vel_scale = config["dof_vel_scale"] + action_scale = config["action_scale"] + clip_observations = config["clip_observations"] + clip_actions = config["clip_actions"] + + num_actions = config["num_actions"] + num_obs = config["num_obs"] + + joint_hist_len = config["joint_hist_len"] + + # define context variables + action = np.zeros(num_actions, dtype=np.float32) + + target_dof_pos = default_angles.copy() + obs = np.zeros(num_obs, dtype=np.float32) + # projected_gravity = torch.tensor([0,0,-1],dtype=torch.float) + + counter = 0 + + # Load robot model + m = mujoco.MjModel.from_xml_path(xml_path) + d = mujoco.MjData(m) + m.opt.timestep = simulation_dt + + # load policy + policy = torch.jit.load(policy_path) + + dof_bias = np.zeros(num_actions, dtype=np.float32) + # q, dq, qtgt hist reset + qj_start = d.qpos[7:] + + joint_hist = np.zeros((joint_hist_len, num_actions * 3), dtype=np.float32) + + joint_hist[:, 0:num_actions] = (qj_start-dof_bias) + joint_hist[:, num_actions:num_actions] = 0. + joint_hist[:, 2*num_actions:3*num_actions] = \ + (qj_start - default_angles -dof_bias) / action_scale + + print('joint_hist:',joint_hist) + + # init low pass filter + action_filt = True + action_cutfreq = 4.0 + torque_effort_scale = 1.0 + ctrl_dt = simulation_dt * control_decimation + print('ctrl_dt:',ctrl_dt) + + if action_filt: + action_filter = ActionFilterButterTorch(lowcut=np.zeros(num_actions), + highcut=np.ones(num_actions) * action_cutfreq, + sampling_rate=1./ctrl_dt, num_joints= num_actions, + device="cpu") + with mujoco.viewer.launch_passive(m, d) as viewer: + # Close the viewer automatically after simulation_duration wall-seconds. + start = time.time() + viewer.cam.lookat[0] = 0 + viewer.cam.lookat[1] = 0 + viewer.cam.lookat[2] = 1 + viewer.cam.distance = 8 + viewer.cam.elevation = -20 + viewer.cam.azimuth = 90 + while viewer.is_running() and time.time() - start < simulation_duration: + step_start = time.time() + tau = pd_control(target_dof_pos, d.qpos[7:], kps, np.zeros_like(kds), d.qvel[6:], kds) + d.ctrl[:] = tau + # mj_step can be replaced with code that also evaluates + # a policy and applies a control signal before stepping the physics. + mujoco.mj_step(m, d) + + counter += 1 + if counter % control_decimation == 0: + # Apply control signal here. + + # create observation + qj = d.qpos[7:] + dqj = d.qvel[6:] + quat = d.qpos[3:7] + omega = d.qvel[3:6] + lin_vel = d.qvel[0:3] + base_pos = d.qpos[0:3] + + + gravity_orientation = get_gravity_orientation(quat) + omega = omega * ang_vel_scale + lin_vel = lin_vel * lin_vel_scale + + count = counter * simulation_dt + phase = np.array([0., 1., 0., 1.]) + # phase = np.array([0., 0., 0., 0.]) + + # q, dq, joint action hist update + joint_hist[1:, :] = joint_hist[:-1, :].copy() + joint_hist[0, 0:num_actions] = qj.copy() - dof_bias + joint_hist[0, num_actions:2*num_actions] = dqj.copy() + joint_hist[0, 2*num_actions:3*num_actions] = action.copy() + + print('joint_hist:',joint_hist) + print('global_obs(base_pos,quat):',global_obs(base_pos,quat)) + + obs[:19] = (qj - dof_bias) * dof_pos_scale + obs[19:38] = dqj * dof_vel_scale + obs[38:41] = omega + obs[41:44] = lin_vel + obs[44:47] = gravity_orientation + obs[47:51] = phase + obs[51:54] = global_obs(base_pos,quat) + obs[54:73] = action + obs[73:130] = joint_hist[1,:] + obs[130:187] = joint_hist[2,:] + obs = np.clip(obs, -clip_observations, clip_observations) + + obs_tensor = torch.from_numpy(obs).unsqueeze(0) + # print('obs_tensor:',obs_tensor) + # policy inference + action = policy(obs_tensor).detach().numpy().squeeze() + action = np.clip(action, -clip_actions, clip_actions) + actions_filter = action.copy() + actions_filter = torch.tensor(actions_filter) + if action_filt: + actions_filter = action_filter.filter(actions_filter.reshape(num_actions)).reshape(1, num_actions) + + # transform action to target_dof_pos + target_dof_pos = actions_filter * action_scale + default_angles + print('target_dof_pos:',target_dof_pos) + # Pick up changes to the physics state, apply perturbations, update options from GUI. + viewer.sync() + + # Rudimentary time keeping, will drift relative to wall clock. + time_until_next_step = m.opt.timestep - (time.time() - step_start) + if time_until_next_step > 0: + time.sleep(time_until_next_step) + diff --git a/legged_gym/legged_gym/utils/task_registry.py b/legged_gym/legged_gym/utils/task_registry.py index 1e700a5..919a49d 100644 --- a/legged_gym/legged_gym/utils/task_registry.py +++ b/legged_gym/legged_gym/utils/task_registry.py @@ -66,6 +66,7 @@ def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCf # parse sim params (convert to dict first) sim_params = {"sim": class_to_dict(env_cfg.sim)} sim_params = parse_sim_params(args, sim_params) + args.headless =False env = task_class( cfg=env_cfg, sim_params=sim_params, physics_engine=args.physics_engine, diff --git a/legged_gym/logs/h1:jjack/exported/policies/24_12_09_12-01-45_model_25800.pt b/legged_gym/logs/h1:jjack/exported/policies/24_12_09_12-01-45_model_25800.pt new file mode 100644 index 0000000..0c02a3f Binary files /dev/null and b/legged_gym/logs/h1:jjack/exported/policies/24_12_09_12-01-45_model_25800.pt differ diff --git a/legged_gym/resources/robots/h1/xml/h1.xml b/legged_gym/resources/robots/h1/xml/h1.xml index 4ffa0af..719fe54 100644 --- a/legged_gym/resources/robots/h1/xml/h1.xml +++ b/legged_gym/resources/robots/h1/xml/h1.xml @@ -1,5 +1,5 @@ - + diff --git a/legged_gym/resources/robots/h1/xml/h1_stage.xml b/legged_gym/resources/robots/h1/xml/h1_stage.xml new file mode 100644 index 0000000..2a220da --- /dev/null +++ b/legged_gym/resources/robots/h1/xml/h1_stage.xml @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/legged_gym/resources/robots/h1/xml/scene.xml b/legged_gym/resources/robots/h1/xml/scene.xml new file mode 100644 index 0000000..2241dde --- /dev/null +++ b/legged_gym/resources/robots/h1/xml/scene.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file