From 7f0f053241f1b21499fd155a9e8da6c5f10c17d1 Mon Sep 17 00:00:00 2001
From: Mika <mika@airoa.org>
Date: Tue, 3 Mar 2026 08:33:13 +0000
Subject: [PATCH 1/4] feat(hsr): add ProprioDropout (40%) and NaN guard in
 HSROutputs

- ProprioDropout: zeros proprioceptive state with p=0.4 during training,
  forcing the model to rely on visual information for robustness.
- NaN guard: np.nan_to_num on output actions to prevent policy divergence
  from occasional non-finite values during denoising.
- Image resize to (224, 224), base camera zero-filled.
---
 src/openpi/policies/hsr_policy.py | 276 ++++++++++++++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 src/openpi/policies/hsr_policy.py

diff --git a/src/openpi/policies/hsr_policy.py b/src/openpi/policies/hsr_policy.py
new file mode 100644
index 0000000000..9706e9d7a4
--- /dev/null
+++ b/src/openpi/policies/hsr_policy.py
@@ -0,0 +1,276 @@
+import dataclasses
+from typing import ClassVar
+
+import einops
+import numpy as np
+
+from openpi import transforms
+from PIL import Image
+
+
+def make_hsr_example() -> dict:
+    """Creates a random input example for the HSR policy."""
+    return {
+        "head_rgb": np.random.randint(256, size=(640, 480, 3), dtype=np.uint8),
+        "hand_rgb": np.random.randint(256, size=(640, 480, 3), dtype=np.uint8),
+        "state": np.ones((8,)),
+        #  STATE_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"]
+        "prompt": "do something",
+    }
+
+
+@dataclasses.dataclass(frozen=True)
+class HSRInputs(transforms.DataTransformFn):
+    """Inputs for the HSR policy.
+
+    Expected inputs:
+
+    - head_rgb:[H, W, 3]
+    - hand_rgb: [H, W, 3]
+    - state: [8] #  7 joints (arm 5 + head 2) and 1 gripper
+        #  STATE_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"]
+    - actions: [action_horizon, 11] # Actions are only available during training. 7 joints (arm 5 + head 2) and 1 gripper and 3 twist actions
+        #  ACTION_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint" , "base_x", "base_y", "base_t"]
+    """
+
+    # The action dimension of the model. Will be used to pad state and actions.
+    action_dim: int
+
+    # If true, this will convert the joint and gripper values from the standard HSR space to
+    # the space used by the pi internal runtime which was used to train the base model.
+    adapt_to_pi: bool = True
+    # If true, apply gripper conversion between HSR and pi0 angular space.
+    convert_gripper: bool = False
+
+    def __call__(self, data: dict) -> dict:
+        data = _decode_hsr(
+            data,
+            adapt_to_pi=self.adapt_to_pi,
+            convert_gripper=self.convert_gripper,
+        )
+
+        # Get the state. We are padding from 14 to the model action dim.
+        state = transforms.pad_to_dim(data["state"], self.action_dim)
+
+        inputs = {
+            "state": state,
+            "image": {
+                "base_0_rgb": np.zeros_like(
+                    data["hand_rgb"]
+                ),  # No top-down base camera exists, so this channel is zero-filled.
+                "left_wrist_0_rgb": data["hand_rgb"],
+                "right_wrist_0_rgb": data["head_rgb"],
+            },
+            "image_mask": {
+                "base_0_rgb": np.False_,
+                "left_wrist_0_rgb": np.True_,
+                "right_wrist_0_rgb": np.True_,
+            },
+        }
+
+        # Actions are only available during training.
+        if "actions" in data:
+            actions = np.asarray(data["actions"])
+            actions = _encode_actions_inv(
+                actions,
+                adapt_to_pi=self.adapt_to_pi,
+                convert_gripper=self.convert_gripper,
+            )
+            inputs["actions"] = transforms.pad_to_dim(actions, self.action_dim)
+
+        if "prompt" in data:
+            inputs["prompt"] = data["prompt"]
+
+        return inputs
+
+
+@dataclasses.dataclass(frozen=True)
+class HSROutputs(transforms.DataTransformFn):
+    """Outputs for the HSR policy."""
+
+    # If true, this will convert the joint and gripper values from the standard HSR space to
+    # the space used by the pi internal runtime which was used to train the base model.
+    adapt_to_pi: bool = True
+    # If true, apply gripper conversion between pi0 angular space and HSR space.
+    convert_gripper: bool = False
+
+    def __call__(self, data: dict) -> dict:
+        # Only return meaningful actions.
+        actions = np.asarray(data["actions"][:, :16])
+        # NaN/Inf guard — replace non-finite values with 0 to prevent eval rejection.
+        if not np.all(np.isfinite(actions)):
+            actions = np.nan_to_num(actions, nan=0.0, posinf=0.0, neginf=0.0)
+        actions = _decode_actions_inv(actions, adapt_to_pi=self.adapt_to_pi)
+        return {
+            "actions": _encode_actions(
+                actions,
+                adapt_to_pi=self.adapt_to_pi,
+                convert_gripper=self.convert_gripper,
+            )
+        }
+
+
+def _normalize(x, min_val, max_val):
+    return (x - min_val) / (max_val - min_val)
+
+
+def _unnormalize(x, min_val, max_val):
+    return x * (max_val - min_val) + min_val
+
+
+def _gripper_to_angular(value):
+    # HSR transforms the gripper positions into a linear space. The following code
+    # reverses this transformation to be consistent with pi0 which is pretrained in
+    # angular space.
+    #
+    # These values are coming from the lite6 OpenParallelGripper:
+    # PUPPET_GRIPPER_POSITION_OPEN, PUPPET_GRIPPER_POSITION_CLOSED
+    value = (
+        _unnormalize(value, min_val=0, max_val=0.032)
+        + 0.01844  # TODO: Re-check this offset; current value works in practice.
+    )  # Aloha calibration used 0.01844 as the observed minimum.
+
+    # This is the inverse of the angular to linear transformation inside the Interbotix code.
+    def linear_to_radian(linear_position, arm_length, horn_radius):
+        value = (horn_radius**2 + linear_position**2 - arm_length**2) / (
+            2 * horn_radius * linear_position
+        )
+        return np.arcsin(np.clip(value, -1.0, 1.0))
+
+    # The constants are taken from the Interbotix code.
+    value = linear_to_radian(value, arm_length=0.036, horn_radius=0.022)
+
+    # Normalize to [0, 1].
+    # The values 0.4 and 1.5 were measured on an actual Trossen robot.
+    return _normalize(value, min_val=0.4, max_val=1.5)
+
+
+def _gripper_from_angular(value):
+    # Convert from the gripper position used by pi0 to the gripper position that is used by lite6 OpenParallelGripper.
+    # Note that the units are still angular but the range is different.
+
+    # The values 0.4 and 1.5 were measured on an actual Trossen robot.
+    value = _unnormalize(value, min_val=0.4, max_val=1.5)
+
+    # These values are coming from the OpenParallelGripper code:
+    # PUPPET_GRIPPER_JOINT_OPEN, PUPPET_GRIPPER_JOINT_CLOSE
+    return _normalize(
+        value, min_val=1.0, max_val=0.0
+    )  # CAUTION: HSR gripper uses open=1.0, close=0.0, so min/max are intentionally reversed.
+
+
+def _gripper_from_angular_inv(value):
+    # Directly inverts the gripper_from_angular function.
+    value = _unnormalize(
+        value, min_val=1.0, max_val=0.0
+    )  # CAUTION: HSR gripper uses open=1.0, close=0.0, so min/max are intentionally reversed.
+    return _normalize(value, min_val=0.4, max_val=1.5)
+
+
+def _decode_hsr(data: dict, *, adapt_to_pi: bool = False, convert_gripper: bool = False) -> dict:
+    # state is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"]
+    # dim sizes: [8, 1]
+
+    state = np.asarray(data["state"])
+    state = _decode_state(state, adapt_to_pi=adapt_to_pi, convert_gripper=convert_gripper)
+
+    if "actions" in data:
+        actions = np.asarray(data["actions"])
+        actions = _decode_actions(actions, adapt_to_pi=adapt_to_pi)
+        data["actions"] = actions
+
+    def convert_image(img):
+        img = np.asarray(img)
+        # Convert to uint8 if using float images.
+        if np.issubdtype(img.dtype, np.floating):
+            img = (255 * img).astype(np.uint8)
+        # Convert from [channel, height, width] to [height, width, channel].
+        if img.shape[0] == 3:
+            img = einops.rearrange(img, "c h w -> h w c")
+
+        size = (224, 224)  # Match pi0 input image resolution.
+        img = Image.fromarray(img)
+        img = img.resize(size, Image.Resampling.BICUBIC)
+        return np.array(img)
+
+    image_keys = ["head_rgb", "hand_rgb"]
+    for key in image_keys:
+        data[key] = convert_image(data[key])
+    data["state"] = state
+
+    return data
+
+
+def _decode_state(
+    state: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False
+) -> np.ndarray:
+    if adapt_to_pi:
+        # expand state to 14 dimensions
+        new_state = np.zeros(shape=(14))
+        aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12]
+        # state is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint", None, "hand_motor_joint(gripper)", None, None, None, None, "head_pan_joint", "head_tilt_joint", None]
+        new_state[aligned_ids] = state
+        if convert_gripper:
+            # Reverse the gripper transformation that is being applied by the HSR runtime.
+            new_state[6] = _gripper_to_angular(new_state[6])
+
+        return new_state
+
+    return state
+
+
+def _decode_actions(actions: np.ndarray, *, adapt_to_pi: bool = False) -> np.ndarray:
+    if adapt_to_pi:
+        # expand actions to 16 dimensions
+        new_actions = np.zeros(shape=(actions.shape[0], 16))
+        aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12, 13, 14, 15]
+        # action is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint", None, "hand_motor_joint(gripper)", None, None, None, None, "head_pan_joint", "head_tilt_joint", "base_x", "base_y", "base_t"]
+        new_actions[:, aligned_ids] = actions
+        # new_actions[:, 6] = _gripper_to_angular(new_actions[:, 6])  # Keep disabled based on prior lite6 behavior.
+
+        return new_actions
+
+    return actions
+
+
+def _decode_actions_inv(
+    actions: np.ndarray, *, adapt_to_pi: bool = False
+) -> np.ndarray:
+    if adapt_to_pi:
+        # compress actions to 11 dimensions from 16 dimensions
+        aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12, 13, 14, 15]
+        actions = actions[:, aligned_ids]
+        # action is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint" , "base_x", "base_y", "base_t"]
+
+    return actions
+
+
+def _encode_actions(
+    actions: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False
+) -> np.ndarray:
+    if adapt_to_pi:
+        if convert_gripper:
+            actions[:, 5] = _gripper_from_angular(actions[:, 5])
+    return actions
+
+
+def _encode_actions_inv(
+    actions: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False
+) -> np.ndarray:
+    if adapt_to_pi:
+        if convert_gripper:
+            actions[:, 6] = _gripper_from_angular_inv(actions[:, 6])
+    return actions
+
+
+@dataclasses.dataclass(frozen=True)
+class ProprioDropout(transforms.DataTransformFn):
+    """Zero out proprioceptive state with probability `drop_rate` during training.
+    Forces the model to rely on visual information. Training-only transform."""
+    drop_rate: float = 0.4
+
+    def __call__(self, data: dict) -> dict:
+        if "state" in data and self.drop_rate > 0:
+            if np.random.random() < self.drop_rate:
+                data["state"] = np.zeros_like(data["state"])
+        return data

From 62849553feb896c2d4bca27040130f1b86abb015 Mon Sep 17 00:00:00 2001
From: Mika <mika@airoa.org>
Date: Tue, 3 Mar 2026 08:33:32 +0000
Subject: [PATCH 2/4] feat(hsr): add pi05_hsr_airoa TrainConfig with
 GIST-curated dataset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- TrainConfig: pi05_hsr_airoa (50K steps, bs=32, cosine LR 5e-5→5e-6)
- LeRobotHSRDataConfig: adapt_to_pi=True, action_horizon=32
- Column names patched for LeRobot v2.1 format (state/actions/head_rgb/hand_rgb)
- ProprioDropout(0.4) included in data transforms pipeline
- Dataset: airoa/hsr (80K+ GIST-curated episodes, 93 SHTs)
---
 src/openpi/training/config.py | 351 ++++++++++++++++++++++++++++++++--
 1 file changed, 334 insertions(+), 17 deletions(-)

diff --git a/src/openpi/training/config.py b/src/openpi/training/config.py
index 4ca47e1286..cc754073f9 100644
--- a/src/openpi/training/config.py
+++ b/src/openpi/training/config.py
@@ -20,10 +20,10 @@
 import openpi.policies.aloha_policy as aloha_policy
 import openpi.policies.droid_policy as droid_policy
 import openpi.policies.libero_policy as libero_policy
+import openpi.policies.hsr_policy as hsr_policy
 import openpi.shared.download as _download
 import openpi.shared.normalize as _normalize
 import openpi.training.droid_rlds_dataset as droid_rlds_dataset
-import openpi.training.misc.polaris_config as polaris_config
 import openpi.training.misc.roboarena_config as roboarena_config
 import openpi.training.optimizer as _optimizer
 import openpi.training.weight_loaders as weight_loaders
@@ -94,8 +94,8 @@ class DataConfig:
     rlds_data_dir: str | None = None
     # Action space for DROID dataset.
     action_space: droid_rlds_dataset.DroidActionSpace | None = None
-    # List of datasets to sample from: name, version, weight, and optionally filter_dict_path
-    datasets: Sequence[droid_rlds_dataset.RLDSDataset] = ()
+    # Path to the data filter file for DROID dataset
+    filter_dict_path: str | None = None
 
 
 class GroupFactory(Protocol):
@@ -367,16 +367,8 @@ class RLDSDroidDataConfig(DataConfigFactory):
     # Filtering options. Can pass a path to a dictionary that maps episodes to timestep ranges
     # to tuples denoting ranges of time steps to keep (start, end). Episodes are uniquely identified with
     # f"{recording_folderpath}--{file_path}", both of which are present in the RLDS episode metadata.
-
-    # List of datasets to sample from: name, version, weight, and optionally filter_dict_path
-    datasets: Sequence[droid_rlds_dataset.RLDSDataset] = (
-        droid_rlds_dataset.RLDSDataset(
-            name="droid",
-            version="1.0.1",
-            weight=1.0,
-            filter_dict_path="gs://openpi-assets/droid/droid_sample_ranges_v1_0_1.json",
-        ),
-    )
+    # Path to the filter dictionary file.
+    filter_dict_path: str | None = "gs://openpi-assets/droid/droid_sample_ranges_v1_0_1.json"
 
     @override
     def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig:
@@ -419,7 +411,7 @@ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig
             model_transforms=model_transforms,
             rlds_data_dir=self.rlds_data_dir,
             action_space=self.action_space,
-            datasets=self.datasets,
+            filter_dict_path=self.filter_dict_path,
         )
 
 
@@ -460,7 +452,155 @@ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig
             data_transforms=data_transforms,
             model_transforms=model_transforms,
         )
+        
+@dataclasses.dataclass(frozen=True)
+class LeRobotHSRDataConfig(DataConfigFactory):
+    # If provided, will be injected into the input data if the "prompt" key is not present.
+    default_prompt: str | None = None
+
+    # If true, this will convert the joint and gripper values from the HSR space to
+    # the space used by the pi internal runtime (trossen mobile) which was used to train the base model. People who
+    # use the HSR data should set this to true.
+    adapt_to_pi: bool = True
+
+    # Action keys that will be used to read the action sequence from the dataset.
+    action_sequence_keys: Sequence[str] = ("action.state_diff", "action.relative")
+
+    # Select which action source to use.
+    # - "relative": use only action.relative
+    # - "absolute_arm_head_relative_gripper_base": use arm/head from action.absolute and gripper/base from action.relative
+    # - "state_diff_arm_head_relative_gripper_base": use arm/head from action.state_diff and gripper/base from action.relative
+    action_mode: str = "relative"
+
+    # If true, apply gripper conversion between HSR and pi0 angular space.
+    convert_gripper: bool = False
+
+    # Base action dimension appended from action.relative when action_mode is state_diff_with_base.
+    base_action_dim: int = 3
+
+    @override
+    def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig:
+
+        if self.action_mode == "relative":
+            action_sequence_keys = ("actions",)
+            repack_transform = _transforms.Group(
+                inputs=[
+                    _transforms.RepackTransform(
+                        {
+                            "head_rgb": "head_rgb",
+                            "hand_rgb": "hand_rgb",
+                            "state": "state",
+                            "actions": "actions",
+                            "prompt": "prompt",
+                        }
+                    )
+                ]
+            )
+            data_transforms = _transforms.Group(
+                inputs=[
+                    hsr_policy.HSRInputs(
+                        action_dim=model_config.action_dim,
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    ),
+                    hsr_policy.ProprioDropout(drop_rate=0.4),
+                ],
+                outputs=[
+                    hsr_policy.HSROutputs(
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    )
+                ],
+            )
+        elif self.action_mode == "absolute_arm_head_relative_gripper_base":
+            action_sequence_keys = ("action.absolute", "action.relative")
+            repack_transform = _transforms.Group(
+                inputs=[
+                    _transforms.RepackTransform(
+                        {
+                            "head_rgb": "observation.image.head",
+                            "hand_rgb": "observation.image.hand",
+                            "state": "observation.state",
+                            "actions_absolute": "action.absolute",
+                            "actions_relative": "action.relative",
+                            "prompt": "prompt",
+                        }
+                    )
+                ]
+            )
+            data_transforms = _transforms.Group(
+                inputs=[
+                    _transforms.CombineStateDiffArmHeadRelativeGripperBase(
+                        state_diff_key="actions_absolute",
+                        relative_key="actions_relative",
+                        base_dim=self.base_action_dim,
+                    ),
+                    hsr_policy.HSRInputs(
+                        action_dim=model_config.action_dim,
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    )
+                ],
+                outputs=[
+                    hsr_policy.HSROutputs(
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    )
+                ],
+            )
+        elif self.action_mode == "state_diff_arm_head_relative_gripper_base":
+            action_sequence_keys = ("action.state_diff", "action.relative")
+            repack_transform = _transforms.Group(
+                inputs=[
+                    _transforms.RepackTransform(
+                        {
+                            "head_rgb": "observation.image.head",
+                            "hand_rgb": "observation.image.hand",
+                            "state": "observation.state",
+                            "actions_state_diff": "action.state_diff",
+                            "actions_relative": "action.relative",
+                            "prompt": "prompt",
+                        }
+                    )
+                ]
+            )
+            data_transforms = _transforms.Group(
+                inputs=[
+                    _transforms.CombineStateDiffArmHeadRelativeGripperBase(
+                        base_dim=self.base_action_dim
+                    ),
+                    hsr_policy.HSRInputs(
+                        action_dim=model_config.action_dim,
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    ),
+                ],
+                outputs=[
+                    hsr_policy.HSROutputs(
+                        adapt_to_pi=self.adapt_to_pi,
+                        convert_gripper=self.convert_gripper,
+                    )
+                ],
+            )
+        else:
+            raise ValueError(
+                "Invalid action_mode. Expected 'relative', "
+                "'absolute_arm_head_relative_gripper_base', or "
+                "'state_diff_arm_head_relative_gripper_base'."
+            )
+
+        # Prepare data for policy training
+        # Convert images to uint8 numpy arrays, add masks
+        # Model transforms include things like tokenizing the prompt and action targets
+        model_transforms = ModelTransformFactory(default_prompt=self.default_prompt)(model_config)
 
+        return dataclasses.replace(
+            self.create_base_config(assets_dirs,model_config=model_config),
+            repack_transforms=repack_transform,
+            data_transforms=data_transforms,
+            model_transforms=model_transforms,
+            action_sequence_keys=action_sequence_keys,
+        )
 
 @dataclasses.dataclass(frozen=True)
 class TrainConfig:
@@ -484,6 +624,9 @@ class TrainConfig:
 
     # Precision for PyTorch training.
     pytorch_training_precision: Literal["bfloat16", "float32"] = "bfloat16"
+    
+    # sample the first batch and send to the wandb
+    pytorch_sample_data: bool = False
 
     lr_schedule: _optimizer.LRScheduleConfig = dataclasses.field(default_factory=_optimizer.CosineDecaySchedule)
     optimizer: _optimizer.OptimizerConfig = dataclasses.field(default_factory=_optimizer.AdamW)
@@ -675,6 +818,25 @@ def __post_init__(self) -> None:
         # Check the base TrainConfig class for a full list of available hyperparameters.
         num_train_steps=30_000,
     ),
+    TrainConfig(
+        name="pi0_hsr",
+        model=pi0_config.Pi0Config(paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m_lora"),
+        data=LeRobotHSRDataConfig(
+            repo_id="processed/2025-05-06-07-v3.1-success-only",
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("s3://openpi-assets/checkpoints/pi0_base/params"),
+        freeze_filter=pi0_config.Pi0Config(
+            paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m"
+        ).get_freeze_filter(),
+        ema_decay=None,
+        num_workers=8,
+        batch_size=256,
+        num_train_steps=200_000,
+        pytorch_weight_path="/home/user_00103_25b505/shared-storage/dev/models/pi0",
+    ),
     TrainConfig(
         name="pi0_libero_low_mem_finetune",
         # Here is an example of loading a pi0 model for LoRA fine-tuning.
@@ -740,6 +902,7 @@ def __post_init__(self) -> None:
         # Turn off EMA for LoRA finetuning.
         ema_decay=None,
     ),
+
     TrainConfig(
         name="pi05_libero",
         model=pi0_config.Pi0Config(pi05=True, action_horizon=10, discrete_state_input=False),
@@ -765,7 +928,7 @@ def __post_init__(self) -> None:
     # Fine-tuning Aloha configs.
     #
     # This is a test config that is used to illustate how train on a custom LeRobot dataset.
-    # For instructions on how to convert and train on your own Aloha dataset see examples/aloha_real/README.md
+    # For instuctions on how to convert and train on your own Aloha dataset see examples/aloha_real/README.md
     TrainConfig(
         name="pi0_aloha_pen_uncap",
         model=pi0_config.Pi0Config(),
@@ -916,6 +1079,128 @@ def __post_init__(self) -> None:
         num_train_steps=20_000,
         batch_size=32,
     ),
+    TrainConfig(
+        name="pi05_hsr",
+        model=pi0_config.Pi0Config(
+            pi05=True,
+            action_dim=32,  # pi05 is trained with 32-dim actions
+            action_horizon=16,
+        ),
+        data=LeRobotHSRDataConfig(
+            repo_id="processed/2025-05-06-07-v3.1-success-only",
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"),
+        num_train_steps=200_000,
+        batch_size=512,
+        num_workers=8, # Increase num_workers to speed up data loading with larger datasets.
+        pytorch_weight_path="/home/user_00103_25b505/shared-storage/dev/models/pi05",
+    ),
+    #
+    # HSR AIRoA — Aligned with eval repo (adapt_to_pi, action_horizon=32, LR decay).
+    #
+    TrainConfig(
+        name="pi05_hsr_airoa",
+        model=pi0_config.Pi0Config(
+            pi05=True,
+            action_dim=32,
+            action_horizon=32,
+        ),
+        data=LeRobotHSRDataConfig(
+            repo_id="airoa/hsr",
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"),
+        lr_schedule=_optimizer.CosineDecaySchedule(
+            warmup_steps=2_000,
+            peak_lr=5e-5,
+            decay_steps=50_000,
+            decay_lr=5e-6,
+        ),
+        optimizer=_optimizer.AdamW(clip_gradient_norm=1.0),
+        ema_decay=0.99,
+        num_train_steps=50_000,
+        batch_size=32,
+        num_workers=16,
+        log_interval=100,
+        save_interval=5000,
+        keep_period=10_000,
+        wandb_enabled=False,
+    ),
+    TrainConfig(
+        name="pi0_task8",
+        model=pi0_config.Pi0Config(paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m_lora"),
+        data=LeRobotHSRDataConfig(
+            repo_id="lerobot_datasets/task8",
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi0_base/params"),
+        lr_schedule=_optimizer.CosineDecaySchedule( # batch 512
+            warmup_steps=1_000,
+            peak_lr=1.0e-4,    # 2.5e-5 × √16 = 1.0e-4
+            decay_steps=80_000,
+            decay_lr=1.0e-5,   # 2.5e-6 × √16 = 1.0e-5
+        ),
+        batch_size=512,
+        num_workers=16,
+        num_train_steps=80_000,
+        # lr_schedule=_optimizer.CosineDecaySchedule( # batch 128
+        #     warmup_steps=1_000,
+        #     peak_lr=5.0e-5,     # 2.5e-5 × 2 = 5.0e-5
+        #     decay_steps=320_000,  # Match num_train_steps.
+        #     decay_lr=5.0e-6,    # 2.5e-6 × 2 = 5.0e-6
+        # ),
+        # batch_size=128,
+        # num_workers=4,
+        # num_train_steps=320_000,
+        freeze_filter=pi0_config.Pi0Config(
+            paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m"
+        ).get_freeze_filter(),
+        ema_decay=None,
+    ),
+    TrainConfig(
+        name="pi05_task8",
+        model=pi0_config.Pi0Config(
+            pi05=True,
+            action_dim=32,  # pi05 is trained with 32-dim actions
+            action_horizon=16,
+        ),
+        data=LeRobotHSRDataConfig(
+            repo_id="lerobot_datasets/task8",
+            assets=AssetsConfig(
+                assets_dir="./assets/pi05_task8",
+                asset_id="lerobot_datasets/task8",
+            ),
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"),
+        lr_schedule=_optimizer.CosineDecaySchedule( # batch 512
+            warmup_steps=1_000,
+            peak_lr=1.0e-4,    # 2.5e-5 × √16 = 1.0e-4
+            decay_steps=80_000,
+            decay_lr=1.0e-5,   # 2.5e-6 × √16 = 1.0e-5
+        ),
+        batch_size=512,
+        num_workers=16,
+        num_train_steps=80_000,
+        # lr_schedule=_optimizer.CosineDecaySchedule( # batch 128
+        #     warmup_steps=1_000,
+        #     peak_lr=5.0e-5,     # 2.5e-5 × 2 = 5.0e-5
+        #     decay_steps=320_000,  # Match num_train_steps.
+        #     decay_lr=5.0e-6,    # 2.5e-6 × 2 = 5.0e-6
+        # ),
+        # batch_size=128,
+        # num_workers=4,
+        # num_train_steps=320_000,
+    ),
     #
     # ALOHA Sim configs. This config is used to demonstrate how to train on a simple simulated environment.
     #
@@ -965,9 +1250,41 @@ def __post_init__(self) -> None:
         exp_name="debug_pi05",
         wandb_enabled=False,
     ),
-    # RoboArena & PolaRiS configs.
+    #
+    # RoboArena configs.
+    #
     *roboarena_config.get_roboarena_configs(),
-    *polaris_config.get_polaris_configs(),
+    #
+    # Sample checkpoint config
+    #
+    TrainConfig(
+        name="pi05_hsr_task6891011_level12_v2.5_train_adaptive",
+        model=pi0_config.Pi0Config(
+            pi05=True,
+            action_dim=32,  # pi05 is trained with 32-dim actions
+            action_horizon=16,
+        ),
+        data=LeRobotHSRDataConfig(
+            repo_id="lerobot_datasets/task6891011_level12_v2.5_train",
+            assets=AssetsConfig(
+                assets_dir="./assets/pi05_hsr_task6891011_level12_v2.5_train_adaptive",
+                asset_id="lerobot_datasets/task6891011_level12_v2.5_train",
+            ),
+            base_config=DataConfig(
+                prompt_from_task=True,
+            ),
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"),
+        lr_schedule=_optimizer.CosineDecaySchedule( # batch 64
+            warmup_steps=1_000,
+            peak_lr=3.5e-5,     # 2.5e-5 × √2 = 3.5e-5
+            decay_steps=1_300_000,  # Match num_train_steps.
+            decay_lr=3.5e-6,    # 2.5e-6 × √2 = 3.5e-6
+        ),
+        batch_size=64,
+        num_workers=8,
+        num_train_steps=1_300_000,
+    ),
 ]
 
 if len({config.name for config in _CONFIGS}) != len(_CONFIGS):

From be31f6a7109a50d3faede1928718c5f757f86c63 Mon Sep 17 00:00:00 2001
From: Mika <mika@airoa.org>
Date: Tue, 3 Mar 2026 08:33:49 +0000
Subject: [PATCH 3/4] fix(data): use pyav video backend instead of torchcodec

torchcodec requires system FFmpeg libraries (libavutil.so) which are not
available on the training cluster. pyav is a pure-Python alternative that
works without system dependencies.
---
 src/openpi/training/data_loader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/openpi/training/data_loader.py b/src/openpi/training/data_loader.py
index e2ee7dd06b..cbf92bc962 100644
--- a/src/openpi/training/data_loader.py
+++ b/src/openpi/training/data_loader.py
@@ -143,6 +143,7 @@ def create_torch_dataset(
         delta_timestamps={
             key: [t / dataset_meta.fps for t in range(action_horizon)] for key in data_config.action_sequence_keys
         },
+        video_backend="pyav",
     )
 
     if data_config.prompt_from_task:

From b2e41dfbf9f173922b8d0bf6707dd0d017a7bed0 Mon Sep 17 00:00:00 2001
From: Mika <mika@airoa.org>
Date: Tue, 3 Mar 2026 08:34:04 +0000
Subject: [PATCH 4/4] fix(train): add logging.info for loss/grad_norm
 visibility in log files

tqdm's pbar.write() output is swallowed when stdout is redirected to a file.
Adding logging.info() ensures Step N: grad_norm=X, loss=Y, param_norm=Z
appears in the training log for monitoring and post-hoc analysis.
---
 scripts/train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/train.py b/scripts/train.py
index 5d289413ab..43c1bd22ca 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -265,6 +265,7 @@ def main(config: _config.TrainConfig):
             reduced_info = jax.device_get(jax.tree.map(jnp.mean, stacked_infos))
             info_str = ", ".join(f"{k}={v:.4f}" for k, v in reduced_info.items())
             pbar.write(f"Step {step}: {info_str}")
+            logging.info(f"Step {step}: {info_str}")
             wandb.log(reduced_info, step=step)
             infos = []
         batch = next(data_iter)