Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pilla_rl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""pilla_rl — curriculum learning framework for Go2 quadruped robot tasks."""

__version__ = "0.1.0"
7 changes: 7 additions & 0 deletions pilla_rl/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Allow running the package as ``python -m pilla_rl.train`` or
``python -m pilla_rl`` (which defaults to the train entry point).
"""

from pilla_rl.train import main

main()
71 changes: 71 additions & 0 deletions pilla_rl/config_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""YAML configuration loader and environment factory.

Usage::

from pilla_rl.config_loader import load_task_config, instantiate_env

cfg = load_task_config("pilla_rl/configs/tasks/walk.yaml")
env = instantiate_env(cfg, num_envs=4096, show_viewer=False)

Dependencies: pyyaml (``pip install pyyaml``)
"""

import importlib
from pathlib import Path

import yaml


def load_task_config(config_path: str) -> dict:
"""Load a YAML task config file and return it as a plain dict.

Parameters
----------
config_path:
Path to the ``.yaml`` file, either absolute or relative to the
current working directory.

Returns
-------
dict
The parsed configuration.
"""
config_path = Path(config_path)
with config_path.open("r") as fh:
cfg = yaml.safe_load(fh)
return cfg


def instantiate_env(config: dict, num_envs: int = 4096, show_viewer: bool = False):
"""Dynamically import the env class specified in *config* and instantiate it.

The config dict must contain an ``env_class`` key with a fully-qualified
class name, e.g. ``"pilla_rl.envs.walk_env.WalkEnv"``.

Parameters
----------
config:
Parsed task config dict (as returned by :func:`load_task_config`).
num_envs:
Number of parallel simulation environments.
show_viewer:
Whether to open the Genesis viewer window.

Returns
-------
BaseQuadrupedEnv
An instantiated environment object.
"""
env_class_path: str = config["env_class"]
module_path, class_name = env_class_path.rsplit(".", 1)
module = importlib.import_module(module_path)
env_cls = getattr(module, class_name)

return env_cls(
num_envs=num_envs,
env_cfg=config["env_cfg"],
obs_cfg=config["obs_cfg"],
reward_cfg=config["reward_cfg"],
command_cfg=config["command_cfg"],
show_viewer=show_viewer,
)
31 changes: 31 additions & 0 deletions pilla_rl/configs/curricula/recovery_to_walk.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Curriculum: recovery → walk
#
# Example multi-stage curriculum that chains two task configs.
# Each stage can override reward scales and command ranges, and can
# optionally load a checkpoint from the previous stage.
#
# Usage (conceptual):
# python -m pilla_rl.train \
# --config pilla_rl/configs/curricula/recovery_to_walk.yaml \
# --num_envs 4096

stages:

- name: "recovery"
config: "pilla_rl/configs/tasks/recovery.yaml"
max_iterations: 20000
reward_overrides: {}
command_overrides: {}

- name: "walk"
config: "pilla_rl/configs/tasks/walk.yaml"
max_iterations: 10000
load_from: "previous" # load checkpoint from the preceding stage
reward_overrides:
# gradually re-introduce locomotion rewards
tracking_lin_vel: 1.0
tracking_ang_vel: 0.5
command_overrides:
lin_vel_x_range: [-1.0, 2.0]
lin_vel_y_range: [-0.5, 0.5]
ang_vel_range: [-0.5, 0.5]
118 changes: 118 additions & 0 deletions pilla_rl/configs/tasks/recovery.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Upside-down recovery task configuration
# Matches go2/upside_down_recovery/go2_train.py get_cfgs() / get_train_cfg()
# Requires: pyyaml

env_class: "pilla_rl.envs.recovery_env.RecoveryEnv"

env_cfg:
num_actions: 12
default_joint_angles:
FL_hip_joint: 0.0
FR_hip_joint: 0.0
RL_hip_joint: 0.0
RR_hip_joint: 0.0
FL_thigh_joint: 0.8
FR_thigh_joint: 0.8
RL_thigh_joint: 1.0
RR_thigh_joint: 1.0
FL_calf_joint: -1.5
FR_calf_joint: -1.5
RL_calf_joint: -1.5
RR_calf_joint: -1.5
joint_names:
- FR_hip_joint
- FR_thigh_joint
- FR_calf_joint
- FL_hip_joint
- FL_thigh_joint
- FL_calf_joint
- RR_hip_joint
- RR_thigh_joint
- RR_calf_joint
- RL_hip_joint
- RL_thigh_joint
- RL_calf_joint
kp: 20.0
kd: 0.5
termination_if_roll_greater_than: 180
termination_if_pitch_greater_than: 90
base_init_pos: [0.0, 0.0, 0.42]
base_init_quat: [1.0, 0.0, 0.0, 0.0]
episode_length_s: 20.0
resampling_time_s: 10.0
action_scale: 0.3
simulate_action_latency: true
clip_actions: 100.0

obs_cfg:
num_obs: 48
obs_scales:
lin_vel: 2.0
ang_vel: 0.25
dof_pos: 1.0
dof_vel: 0.05

reward_cfg:
tracking_sigma: 0.25
base_height_target: 0.42
feet_height_target: 0.075
reward_scales:
tracking_lin_vel: 0.0
tracking_ang_vel: 0.0
lin_vel_z: -1.0
base_height: -2.0
action_rate: -0.02
similar_to_default: -0.1
upright_orientation: 20.0
recovery_progress: 30.0
minimize_base_roll: 15.0
stability: 5.0
legs_not_in_air: 8.0
energy_efficiency: 3.0
forward_progress: 2.0

command_cfg:
num_commands: 3
lin_vel_x_range: [0.0, 0.0]
lin_vel_y_range: [0.0, 0.0]
ang_vel_range: [0.0, 0.0]

train:
exp_name: "go2-upside-down-recovery"
algorithm:
class_name: "PPO"
clip_param: 0.2
desired_kl: 0.01
entropy_coef: 0.01
gamma: 0.998
lam: 0.95
learning_rate: 0.0003
max_grad_norm: 1.0
num_learning_epochs: 10
num_mini_batches: 4
schedule: "adaptive"
use_clipped_value_loss: true
value_loss_coef: 1.0
init_member_classes: {}
policy:
activation: "elu"
actor_hidden_dims: [512, 256, 128]
critic_hidden_dims: [512, 256, 128]
init_noise_std: 1.0
class_name: "ActorCritic"
runner:
checkpoint: -1
experiment_name: "go2-upside-down-recovery"
load_run: -1
log_interval: 1
max_iterations: 20000
record_interval: -1
resume: false
resume_path: null
run_name: ""
logger: "tensorboard"
runner_class_name: "OnPolicyRunner"
num_steps_per_env: 24
save_interval: 100
empirical_normalization: null
seed: 1
115 changes: 115 additions & 0 deletions pilla_rl/configs/tasks/standup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Standup task configuration
# Matches go2/standup_copilot/go2_train.py get_cfgs() / get_train_cfg()
# Requires: pyyaml

env_class: "pilla_rl.envs.standup_env.StandupEnv"

env_cfg:
num_actions: 12
default_joint_angles:
FL_hip_joint: 0.0
FR_hip_joint: 0.0
RL_hip_joint: 0.0
RR_hip_joint: 0.0
FL_thigh_joint: 0.8
FR_thigh_joint: 0.8
RL_thigh_joint: 1.0
RR_thigh_joint: 1.0
FL_calf_joint: -1.5
FR_calf_joint: -1.5
RL_calf_joint: -1.5
RR_calf_joint: -1.5
joint_names:
- FR_hip_joint
- FR_thigh_joint
- FR_calf_joint
- FL_hip_joint
- FL_thigh_joint
- FL_calf_joint
- RR_hip_joint
- RR_thigh_joint
- RR_calf_joint
- RL_hip_joint
- RL_thigh_joint
- RL_calf_joint
kp: 20.0
kd: 0.5
termination_if_roll_greater_than: 45
termination_if_pitch_greater_than: 45
base_init_pos: [0.0, 0.0, 0.42]
base_init_quat: [1.0, 0.0, 0.0, 0.0]
episode_length_s: 15.0
resampling_time_s: 8.0
action_scale: 0.25
simulate_action_latency: true
clip_actions: 100.0

obs_cfg:
num_obs: 48
obs_scales:
lin_vel: 2.0
ang_vel: 0.25
dof_pos: 1.0
dof_vel: 0.05

reward_cfg:
tracking_sigma: 0.25
base_height_target: 0.42
feet_height_target: 0.075
reward_scales:
tracking_lin_vel: 0.0
tracking_ang_vel: 0.0
lin_vel_z: -2.0
base_height: -5.0
action_rate: -0.01
similar_to_default: -0.5
upright_orientation: 15.0
stability: 10.0
stand_up_progress: 25.0
joint_regularization: 2.0

command_cfg:
num_commands: 3
lin_vel_x_range: [0.0, 0.0]
lin_vel_y_range: [0.0, 0.0]
ang_vel_range: [0.0, 0.0]

train:
exp_name: "go2-standup"
algorithm:
class_name: "PPO"
clip_param: 0.2
desired_kl: 0.01
entropy_coef: 0.005
gamma: 0.99
lam: 0.95
learning_rate: 0.0005
max_grad_norm: 1.0
num_learning_epochs: 8
num_mini_batches: 4
schedule: "adaptive"
use_clipped_value_loss: true
value_loss_coef: 1.0
init_member_classes: {}
policy:
activation: "elu"
actor_hidden_dims: [512, 256, 128]
critic_hidden_dims: [512, 256, 128]
init_noise_std: 1.0
class_name: "ActorCritic"
runner:
checkpoint: -1
experiment_name: "go2-standup"
load_run: -1
log_interval: 1
max_iterations: 15000
record_interval: -1
resume: false
resume_path: null
run_name: ""
logger: "tensorboard"
runner_class_name: "OnPolicyRunner"
num_steps_per_env: 24
save_interval: 100
empirical_normalization: null
seed: 1
Loading