code-name-57 · Copilot · Apr 2, 2026 · Apr 2, 2026
diff --git a/pilla_rl/__init__.py b/pilla_rl/__init__.py
@@ -0,0 +1,3 @@
+"""pilla_rl — curriculum learning framework for Go2 quadruped robot tasks."""
+
+__version__ = "0.1.0"
diff --git a/pilla_rl/__main__.py b/pilla_rl/__main__.py
@@ -0,0 +1,7 @@
+"""Allow running the package as ``python -m pilla_rl.train`` or
+``python -m pilla_rl`` (which defaults to the train entry point).
+"""
+
+from pilla_rl.train import main
+
+main()
diff --git a/pilla_rl/config_loader.py b/pilla_rl/config_loader.py
@@ -0,0 +1,71 @@
+"""YAML configuration loader and environment factory.
+
+Usage::
+
+    from pilla_rl.config_loader import load_task_config, instantiate_env
+
+    cfg = load_task_config("pilla_rl/configs/tasks/walk.yaml")
+    env = instantiate_env(cfg, num_envs=4096, show_viewer=False)
+
+Dependencies: pyyaml (``pip install pyyaml``)
+"""
+
+import importlib
+from pathlib import Path
+
+import yaml
+
+
+def load_task_config(config_path: str) -> dict:
+    """Load a YAML task config file and return it as a plain dict.
+
+    Parameters
+    ----------
+    config_path:
+        Path to the ``.yaml`` file, either absolute or relative to the
+        current working directory.
+
+    Returns
+    -------
+    dict
+        The parsed configuration.
+    """
+    config_path = Path(config_path)
+    with config_path.open("r") as fh:
+        cfg = yaml.safe_load(fh)
+    return cfg
+
+
+def instantiate_env(config: dict, num_envs: int = 4096, show_viewer: bool = False):
+    """Dynamically import the env class specified in *config* and instantiate it.
+
+    The config dict must contain an ``env_class`` key with a fully-qualified
+    class name, e.g. ``"pilla_rl.envs.walk_env.WalkEnv"``.
+
+    Parameters
+    ----------
+    config:
+        Parsed task config dict (as returned by :func:`load_task_config`).
+    num_envs:
+        Number of parallel simulation environments.
+    show_viewer:
+        Whether to open the Genesis viewer window.
+
+    Returns
+    -------
+    BaseQuadrupedEnv
+        An instantiated environment object.
+    """
+    env_class_path: str = config["env_class"]
+    module_path, class_name = env_class_path.rsplit(".", 1)
+    module = importlib.import_module(module_path)
+    env_cls = getattr(module, class_name)
+
+    return env_cls(
+        num_envs=num_envs,
+        env_cfg=config["env_cfg"],
+        obs_cfg=config["obs_cfg"],
+        reward_cfg=config["reward_cfg"],
+        command_cfg=config["command_cfg"],
+        show_viewer=show_viewer,
+    )
diff --git a/pilla_rl/configs/curricula/recovery_to_walk.yaml b/pilla_rl/configs/curricula/recovery_to_walk.yaml
@@ -0,0 +1,31 @@
+# Curriculum: recovery → walk
+#
+# Example multi-stage curriculum that chains two task configs.
+# Each stage can override reward scales and command ranges, and can
+# optionally load a checkpoint from the previous stage.
+#
+# Usage (conceptual):
+#   python -m pilla_rl.train \
+#       --config pilla_rl/configs/curricula/recovery_to_walk.yaml \
+#       --num_envs 4096
+
+stages:
+
+  - name: "recovery"
+    config: "pilla_rl/configs/tasks/recovery.yaml"
+    max_iterations: 20000
+    reward_overrides: {}
+    command_overrides: {}
+
+  - name: "walk"
+    config: "pilla_rl/configs/tasks/walk.yaml"
+    max_iterations: 10000
+    load_from: "previous"  # load checkpoint from the preceding stage
+    reward_overrides:
+      # gradually re-introduce locomotion rewards
+      tracking_lin_vel: 1.0
+      tracking_ang_vel: 0.5
+    command_overrides:
+      lin_vel_x_range: [-1.0, 2.0]
+      lin_vel_y_range: [-0.5, 0.5]
+      ang_vel_range: [-0.5, 0.5]
diff --git a/pilla_rl/configs/tasks/recovery.yaml b/pilla_rl/configs/tasks/recovery.yaml
@@ -0,0 +1,118 @@
+# Upside-down recovery task configuration
+# Matches go2/upside_down_recovery/go2_train.py get_cfgs() / get_train_cfg()
+# Requires: pyyaml
+
+env_class: "pilla_rl.envs.recovery_env.RecoveryEnv"
+
+env_cfg:
+  num_actions: 12
+  default_joint_angles:
+    FL_hip_joint: 0.0
+    FR_hip_joint: 0.0
+    RL_hip_joint: 0.0
+    RR_hip_joint: 0.0
+    FL_thigh_joint: 0.8
+    FR_thigh_joint: 0.8
+    RL_thigh_joint: 1.0
+    RR_thigh_joint: 1.0
+    FL_calf_joint: -1.5
+    FR_calf_joint: -1.5
+    RL_calf_joint: -1.5
+    RR_calf_joint: -1.5
+  joint_names:
+    - FR_hip_joint
+    - FR_thigh_joint
+    - FR_calf_joint
+    - FL_hip_joint
+    - FL_thigh_joint
+    - FL_calf_joint
+    - RR_hip_joint
+    - RR_thigh_joint
+    - RR_calf_joint
+    - RL_hip_joint
+    - RL_thigh_joint
+    - RL_calf_joint
+  kp: 20.0
+  kd: 0.5
+  termination_if_roll_greater_than: 180
+  termination_if_pitch_greater_than: 90
+  base_init_pos: [0.0, 0.0, 0.42]
+  base_init_quat: [1.0, 0.0, 0.0, 0.0]
+  episode_length_s: 20.0
+  resampling_time_s: 10.0
+  action_scale: 0.3
+  simulate_action_latency: true
+  clip_actions: 100.0
+
+obs_cfg:
+  num_obs: 48
+  obs_scales:
+    lin_vel: 2.0
+    ang_vel: 0.25
+    dof_pos: 1.0
+    dof_vel: 0.05
+
+reward_cfg:
+  tracking_sigma: 0.25
+  base_height_target: 0.42
+  feet_height_target: 0.075
+  reward_scales:
+    tracking_lin_vel: 0.0
+    tracking_ang_vel: 0.0
+    lin_vel_z: -1.0
+    base_height: -2.0
+    action_rate: -0.02
+    similar_to_default: -0.1
+    upright_orientation: 20.0
+    recovery_progress: 30.0
+    minimize_base_roll: 15.0
+    stability: 5.0
+    legs_not_in_air: 8.0
+    energy_efficiency: 3.0
+    forward_progress: 2.0
+
+command_cfg:
+  num_commands: 3
+  lin_vel_x_range: [0.0, 0.0]
+  lin_vel_y_range: [0.0, 0.0]
+  ang_vel_range: [0.0, 0.0]
+
+train:
+  exp_name: "go2-upside-down-recovery"
+  algorithm:
+    class_name: "PPO"
+    clip_param: 0.2
+    desired_kl: 0.01
+    entropy_coef: 0.01
+    gamma: 0.998
+    lam: 0.95
+    learning_rate: 0.0003
+    max_grad_norm: 1.0
+    num_learning_epochs: 10
+    num_mini_batches: 4
+    schedule: "adaptive"
+    use_clipped_value_loss: true
+    value_loss_coef: 1.0
+  init_member_classes: {}
+  policy:
+    activation: "elu"
+    actor_hidden_dims: [512, 256, 128]
+    critic_hidden_dims: [512, 256, 128]
+    init_noise_std: 1.0
+    class_name: "ActorCritic"
+  runner:
+    checkpoint: -1
+    experiment_name: "go2-upside-down-recovery"
+    load_run: -1
+    log_interval: 1
+    max_iterations: 20000
+    record_interval: -1
+    resume: false
+    resume_path: null
+    run_name: ""
+    logger: "tensorboard"
+  runner_class_name: "OnPolicyRunner"
+  num_steps_per_env: 24
+  save_interval: 100
+  empirical_normalization: null
+  seed: 1
diff --git a/pilla_rl/configs/tasks/standup.yaml b/pilla_rl/configs/tasks/standup.yaml
@@ -0,0 +1,115 @@
+# Standup task configuration
+# Matches go2/standup_copilot/go2_train.py get_cfgs() / get_train_cfg()
+# Requires: pyyaml
+
+env_class: "pilla_rl.envs.standup_env.StandupEnv"
+
+env_cfg:
+  num_actions: 12
+  default_joint_angles:
+    FL_hip_joint: 0.0
+    FR_hip_joint: 0.0
+    RL_hip_joint: 0.0
+    RR_hip_joint: 0.0
+    FL_thigh_joint: 0.8
+    FR_thigh_joint: 0.8
+    RL_thigh_joint: 1.0
+    RR_thigh_joint: 1.0
+    FL_calf_joint: -1.5
+    FR_calf_joint: -1.5
+    RL_calf_joint: -1.5
+    RR_calf_joint: -1.5
+  joint_names:
+    - FR_hip_joint
+    - FR_thigh_joint
+    - FR_calf_joint
+    - FL_hip_joint
+    - FL_thigh_joint
+    - FL_calf_joint
+    - RR_hip_joint
+    - RR_thigh_joint
+    - RR_calf_joint
+    - RL_hip_joint
+    - RL_thigh_joint
+    - RL_calf_joint
+  kp: 20.0
+  kd: 0.5
+  termination_if_roll_greater_than: 45
+  termination_if_pitch_greater_than: 45
+  base_init_pos: [0.0, 0.0, 0.42]
+  base_init_quat: [1.0, 0.0, 0.0, 0.0]
+  episode_length_s: 15.0
+  resampling_time_s: 8.0
+  action_scale: 0.25
+  simulate_action_latency: true
+  clip_actions: 100.0
+
+obs_cfg:
+  num_obs: 48
+  obs_scales:
+    lin_vel: 2.0
+    ang_vel: 0.25
+    dof_pos: 1.0
+    dof_vel: 0.05
+
+reward_cfg:
+  tracking_sigma: 0.25
+  base_height_target: 0.42
+  feet_height_target: 0.075
+  reward_scales:
+    tracking_lin_vel: 0.0
+    tracking_ang_vel: 0.0
+    lin_vel_z: -2.0
+    base_height: -5.0
+    action_rate: -0.01
+    similar_to_default: -0.5
+    upright_orientation: 15.0
+    stability: 10.0
+    stand_up_progress: 25.0
+    joint_regularization: 2.0
+
+command_cfg:
+  num_commands: 3
+  lin_vel_x_range: [0.0, 0.0]
+  lin_vel_y_range: [0.0, 0.0]
+  ang_vel_range: [0.0, 0.0]
+
+train:
+  exp_name: "go2-standup"
+  algorithm:
+    class_name: "PPO"
+    clip_param: 0.2
+    desired_kl: 0.01
+    entropy_coef: 0.005
+    gamma: 0.99
+    lam: 0.95
+    learning_rate: 0.0005
+    max_grad_norm: 1.0
+    num_learning_epochs: 8
+    num_mini_batches: 4
+    schedule: "adaptive"
+    use_clipped_value_loss: true
+    value_loss_coef: 1.0
+  init_member_classes: {}
+  policy:
+    activation: "elu"
+    actor_hidden_dims: [512, 256, 128]
+    critic_hidden_dims: [512, 256, 128]
+    init_noise_std: 1.0
+    class_name: "ActorCritic"
+  runner:
+    checkpoint: -1
+    experiment_name: "go2-standup"
+    load_run: -1
+    log_interval: 1
+    max_iterations: 15000
+    record_interval: -1
+    resume: false
+    resume_path: null
+    run_name: ""
+    logger: "tensorboard"
+  runner_class_name: "OnPolicyRunner"
+  num_steps_per_env: 24
+  save_interval: 100
+  empirical_normalization: null
+  seed: 1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""pilla_rl — curriculum learning framework for Go2 quadruped robot tasks."""

		__version__ = "0.1.0"