From 7f0f053241f1b21499fd155a9e8da6c5f10c17d1 Mon Sep 17 00:00:00 2001 From: Mika Date: Tue, 3 Mar 2026 08:33:13 +0000 Subject: [PATCH 1/4] feat(hsr): add ProprioDropout (40%) and NaN guard in HSROutputs - ProprioDropout: zeros proprioceptive state with p=0.4 during training, forcing the model to rely on visual information for robustness. - NaN guard: np.nan_to_num on output actions to prevent policy divergence from occasional non-finite values during denoising. - Image resize to (224, 224), base camera zero-filled. --- src/openpi/policies/hsr_policy.py | 276 ++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 src/openpi/policies/hsr_policy.py diff --git a/src/openpi/policies/hsr_policy.py b/src/openpi/policies/hsr_policy.py new file mode 100644 index 0000000000..9706e9d7a4 --- /dev/null +++ b/src/openpi/policies/hsr_policy.py @@ -0,0 +1,276 @@ +import dataclasses +from typing import ClassVar + +import einops +import numpy as np + +from openpi import transforms +from PIL import Image + + +def make_hsr_example() -> dict: + """Creates a random input example for the HSR policy.""" + return { + "head_rgb": np.random.randint(256, size=(640, 480, 3), dtype=np.uint8), + "hand_rgb": np.random.randint(256, size=(640, 480, 3), dtype=np.uint8), + "state": np.ones((8,)), + # STATE_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"] + "prompt": "do something", + } + + +@dataclasses.dataclass(frozen=True) +class HSRInputs(transforms.DataTransformFn): + """Inputs for the HSR policy. + + Expected inputs: + + - head_rgb:[H, W, 3] + - hand_rgb: [H, W, 3] + - state: [8] # 7 joints (arm 5 + head 2) and 1 gripper + # STATE_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"] + - actions: [action_horizon, 11] # Actions are only available during training. 7 joints (arm 5 + head 2) and 1 gripper and 3 twist actions + # ACTION_NAMES = ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint" , "base_x", "base_y", "base_t"] + """ + + # The action dimension of the model. Will be used to pad state and actions. + action_dim: int + + # If true, this will convert the joint and gripper values from the standard HSR space to + # the space used by the pi internal runtime which was used to train the base model. + adapt_to_pi: bool = True + # If true, apply gripper conversion between HSR and pi0 angular space. + convert_gripper: bool = False + + def __call__(self, data: dict) -> dict: + data = _decode_hsr( + data, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + + # Get the state. We are padding from 14 to the model action dim. + state = transforms.pad_to_dim(data["state"], self.action_dim) + + inputs = { + "state": state, + "image": { + "base_0_rgb": np.zeros_like( + data["hand_rgb"] + ), # No top-down base camera exists, so this channel is zero-filled. + "left_wrist_0_rgb": data["hand_rgb"], + "right_wrist_0_rgb": data["head_rgb"], + }, + "image_mask": { + "base_0_rgb": np.False_, + "left_wrist_0_rgb": np.True_, + "right_wrist_0_rgb": np.True_, + }, + } + + # Actions are only available during training. + if "actions" in data: + actions = np.asarray(data["actions"]) + actions = _encode_actions_inv( + actions, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + inputs["actions"] = transforms.pad_to_dim(actions, self.action_dim) + + if "prompt" in data: + inputs["prompt"] = data["prompt"] + + return inputs + + +@dataclasses.dataclass(frozen=True) +class HSROutputs(transforms.DataTransformFn): + """Outputs for the HSR policy.""" + + # If true, this will convert the joint and gripper values from the standard HSR space to + # the space used by the pi internal runtime which was used to train the base model. + adapt_to_pi: bool = True + # If true, apply gripper conversion between pi0 angular space and HSR space. + convert_gripper: bool = False + + def __call__(self, data: dict) -> dict: + # Only return meaningful actions. + actions = np.asarray(data["actions"][:, :16]) + # NaN/Inf guard — replace non-finite values with 0 to prevent eval rejection. + if not np.all(np.isfinite(actions)): + actions = np.nan_to_num(actions, nan=0.0, posinf=0.0, neginf=0.0) + actions = _decode_actions_inv(actions, adapt_to_pi=self.adapt_to_pi) + return { + "actions": _encode_actions( + actions, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + } + + +def _normalize(x, min_val, max_val): + return (x - min_val) / (max_val - min_val) + + +def _unnormalize(x, min_val, max_val): + return x * (max_val - min_val) + min_val + + +def _gripper_to_angular(value): + # HSR transforms the gripper positions into a linear space. The following code + # reverses this transformation to be consistent with pi0 which is pretrained in + # angular space. + # + # These values are coming from the lite6 OpenParallelGripper: + # PUPPET_GRIPPER_POSITION_OPEN, PUPPET_GRIPPER_POSITION_CLOSED + value = ( + _unnormalize(value, min_val=0, max_val=0.032) + + 0.01844 # TODO: Re-check this offset; current value works in practice. + ) # Aloha calibration used 0.01844 as the observed minimum. + + # This is the inverse of the angular to linear transformation inside the Interbotix code. + def linear_to_radian(linear_position, arm_length, horn_radius): + value = (horn_radius**2 + linear_position**2 - arm_length**2) / ( + 2 * horn_radius * linear_position + ) + return np.arcsin(np.clip(value, -1.0, 1.0)) + + # The constants are taken from the Interbotix code. + value = linear_to_radian(value, arm_length=0.036, horn_radius=0.022) + + # Normalize to [0, 1]. + # The values 0.4 and 1.5 were measured on an actual Trossen robot. + return _normalize(value, min_val=0.4, max_val=1.5) + + +def _gripper_from_angular(value): + # Convert from the gripper position used by pi0 to the gripper position that is used by lite6 OpenParallelGripper. + # Note that the units are still angular but the range is different. + + # The values 0.4 and 1.5 were measured on an actual Trossen robot. + value = _unnormalize(value, min_val=0.4, max_val=1.5) + + # These values are coming from the OpenParallelGripper code: + # PUPPET_GRIPPER_JOINT_OPEN, PUPPET_GRIPPER_JOINT_CLOSE + return _normalize( + value, min_val=1.0, max_val=0.0 + ) # CAUTION: HSR gripper uses open=1.0, close=0.0, so min/max are intentionally reversed. + + +def _gripper_from_angular_inv(value): + # Directly inverts the gripper_from_angular function. + value = _unnormalize( + value, min_val=1.0, max_val=0.0 + ) # CAUTION: HSR gripper uses open=1.0, close=0.0, so min/max are intentionally reversed. + return _normalize(value, min_val=0.4, max_val=1.5) + + +def _decode_hsr(data: dict, *, adapt_to_pi: bool = False, convert_gripper: bool = False) -> dict: + # state is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint"] + # dim sizes: [8, 1] + + state = np.asarray(data["state"]) + state = _decode_state(state, adapt_to_pi=adapt_to_pi, convert_gripper=convert_gripper) + + if "actions" in data: + actions = np.asarray(data["actions"]) + actions = _decode_actions(actions, adapt_to_pi=adapt_to_pi) + data["actions"] = actions + + def convert_image(img): + img = np.asarray(img) + # Convert to uint8 if using float images. + if np.issubdtype(img.dtype, np.floating): + img = (255 * img).astype(np.uint8) + # Convert from [channel, height, width] to [height, width, channel]. + if img.shape[0] == 3: + img = einops.rearrange(img, "c h w -> h w c") + + size = (224, 224) # Match pi0 input image resolution. + img = Image.fromarray(img) + img = img.resize(size, Image.Resampling.BICUBIC) + return np.array(img) + + image_keys = ["head_rgb", "hand_rgb"] + for key in image_keys: + data[key] = convert_image(data[key]) + data["state"] = state + + return data + + +def _decode_state( + state: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False +) -> np.ndarray: + if adapt_to_pi: + # expand state to 14 dimensions + new_state = np.zeros(shape=(14)) + aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12] + # state is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint", None, "hand_motor_joint(gripper)", None, None, None, None, "head_pan_joint", "head_tilt_joint", None] + new_state[aligned_ids] = state + if convert_gripper: + # Reverse the gripper transformation that is being applied by the HSR runtime. + new_state[6] = _gripper_to_angular(new_state[6]) + + return new_state + + return state + + +def _decode_actions(actions: np.ndarray, *, adapt_to_pi: bool = False) -> np.ndarray: + if adapt_to_pi: + # expand actions to 16 dimensions + new_actions = np.zeros(shape=(actions.shape[0], 16)) + aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12, 13, 14, 15] + # action is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint", None, "hand_motor_joint(gripper)", None, None, None, None, "head_pan_joint", "head_tilt_joint", "base_x", "base_y", "base_t"] + new_actions[:, aligned_ids] = actions + # new_actions[:, 6] = _gripper_to_angular(new_actions[:, 6]) # Keep disabled based on prior lite6 behavior. + + return new_actions + + return actions + + +def _decode_actions_inv( + actions: np.ndarray, *, adapt_to_pi: bool = False +) -> np.ndarray: + if adapt_to_pi: + # compress actions to 11 dimensions from 16 dimensions + aligned_ids = [0, 1, 2, 3, 4, 6, 11, 12, 13, 14, 15] + actions = actions[:, aligned_ids] + # action is ["arm_lift_joint", "arm_flex_joint", "arm_roll_joint", "wrist_flex_joint", "wrist_roll_joint","hand_motor_joint(gripper)", "head_pan_joint", "head_tilt_joint" , "base_x", "base_y", "base_t"] + + return actions + + +def _encode_actions( + actions: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False +) -> np.ndarray: + if adapt_to_pi: + if convert_gripper: + actions[:, 5] = _gripper_from_angular(actions[:, 5]) + return actions + + +def _encode_actions_inv( + actions: np.ndarray, *, adapt_to_pi: bool = False, convert_gripper: bool = False +) -> np.ndarray: + if adapt_to_pi: + if convert_gripper: + actions[:, 6] = _gripper_from_angular_inv(actions[:, 6]) + return actions + + +@dataclasses.dataclass(frozen=True) +class ProprioDropout(transforms.DataTransformFn): + """Zero out proprioceptive state with probability `drop_rate` during training. + Forces the model to rely on visual information. Training-only transform.""" + drop_rate: float = 0.4 + + def __call__(self, data: dict) -> dict: + if "state" in data and self.drop_rate > 0: + if np.random.random() < self.drop_rate: + data["state"] = np.zeros_like(data["state"]) + return data From 62849553feb896c2d4bca27040130f1b86abb015 Mon Sep 17 00:00:00 2001 From: Mika Date: Tue, 3 Mar 2026 08:33:32 +0000 Subject: [PATCH 2/4] feat(hsr): add pi05_hsr_airoa TrainConfig with GIST-curated dataset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TrainConfig: pi05_hsr_airoa (50K steps, bs=32, cosine LR 5e-5→5e-6) - LeRobotHSRDataConfig: adapt_to_pi=True, action_horizon=32 - Column names patched for LeRobot v2.1 format (state/actions/head_rgb/hand_rgb) - ProprioDropout(0.4) included in data transforms pipeline - Dataset: airoa/hsr (80K+ GIST-curated episodes, 93 SHTs) --- src/openpi/training/config.py | 351 ++++++++++++++++++++++++++++++++-- 1 file changed, 334 insertions(+), 17 deletions(-) diff --git a/src/openpi/training/config.py b/src/openpi/training/config.py index 4ca47e1286..cc754073f9 100644 --- a/src/openpi/training/config.py +++ b/src/openpi/training/config.py @@ -20,10 +20,10 @@ import openpi.policies.aloha_policy as aloha_policy import openpi.policies.droid_policy as droid_policy import openpi.policies.libero_policy as libero_policy +import openpi.policies.hsr_policy as hsr_policy import openpi.shared.download as _download import openpi.shared.normalize as _normalize import openpi.training.droid_rlds_dataset as droid_rlds_dataset -import openpi.training.misc.polaris_config as polaris_config import openpi.training.misc.roboarena_config as roboarena_config import openpi.training.optimizer as _optimizer import openpi.training.weight_loaders as weight_loaders @@ -94,8 +94,8 @@ class DataConfig: rlds_data_dir: str | None = None # Action space for DROID dataset. action_space: droid_rlds_dataset.DroidActionSpace | None = None - # List of datasets to sample from: name, version, weight, and optionally filter_dict_path - datasets: Sequence[droid_rlds_dataset.RLDSDataset] = () + # Path to the data filter file for DROID dataset + filter_dict_path: str | None = None class GroupFactory(Protocol): @@ -367,16 +367,8 @@ class RLDSDroidDataConfig(DataConfigFactory): # Filtering options. Can pass a path to a dictionary that maps episodes to timestep ranges # to tuples denoting ranges of time steps to keep (start, end). Episodes are uniquely identified with # f"{recording_folderpath}--{file_path}", both of which are present in the RLDS episode metadata. - - # List of datasets to sample from: name, version, weight, and optionally filter_dict_path - datasets: Sequence[droid_rlds_dataset.RLDSDataset] = ( - droid_rlds_dataset.RLDSDataset( - name="droid", - version="1.0.1", - weight=1.0, - filter_dict_path="gs://openpi-assets/droid/droid_sample_ranges_v1_0_1.json", - ), - ) + # Path to the filter dictionary file. + filter_dict_path: str | None = "gs://openpi-assets/droid/droid_sample_ranges_v1_0_1.json" @override def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig: @@ -419,7 +411,7 @@ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig model_transforms=model_transforms, rlds_data_dir=self.rlds_data_dir, action_space=self.action_space, - datasets=self.datasets, + filter_dict_path=self.filter_dict_path, ) @@ -460,7 +452,155 @@ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig data_transforms=data_transforms, model_transforms=model_transforms, ) + +@dataclasses.dataclass(frozen=True) +class LeRobotHSRDataConfig(DataConfigFactory): + # If provided, will be injected into the input data if the "prompt" key is not present. + default_prompt: str | None = None + + # If true, this will convert the joint and gripper values from the HSR space to + # the space used by the pi internal runtime (trossen mobile) which was used to train the base model. People who + # use the HSR data should set this to true. + adapt_to_pi: bool = True + + # Action keys that will be used to read the action sequence from the dataset. + action_sequence_keys: Sequence[str] = ("action.state_diff", "action.relative") + + # Select which action source to use. + # - "relative": use only action.relative + # - "absolute_arm_head_relative_gripper_base": use arm/head from action.absolute and gripper/base from action.relative + # - "state_diff_arm_head_relative_gripper_base": use arm/head from action.state_diff and gripper/base from action.relative + action_mode: str = "relative" + + # If true, apply gripper conversion between HSR and pi0 angular space. + convert_gripper: bool = False + + # Base action dimension appended from action.relative when action_mode is state_diff_with_base. + base_action_dim: int = 3 + + @override + def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig: + + if self.action_mode == "relative": + action_sequence_keys = ("actions",) + repack_transform = _transforms.Group( + inputs=[ + _transforms.RepackTransform( + { + "head_rgb": "head_rgb", + "hand_rgb": "hand_rgb", + "state": "state", + "actions": "actions", + "prompt": "prompt", + } + ) + ] + ) + data_transforms = _transforms.Group( + inputs=[ + hsr_policy.HSRInputs( + action_dim=model_config.action_dim, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ), + hsr_policy.ProprioDropout(drop_rate=0.4), + ], + outputs=[ + hsr_policy.HSROutputs( + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + ], + ) + elif self.action_mode == "absolute_arm_head_relative_gripper_base": + action_sequence_keys = ("action.absolute", "action.relative") + repack_transform = _transforms.Group( + inputs=[ + _transforms.RepackTransform( + { + "head_rgb": "observation.image.head", + "hand_rgb": "observation.image.hand", + "state": "observation.state", + "actions_absolute": "action.absolute", + "actions_relative": "action.relative", + "prompt": "prompt", + } + ) + ] + ) + data_transforms = _transforms.Group( + inputs=[ + _transforms.CombineStateDiffArmHeadRelativeGripperBase( + state_diff_key="actions_absolute", + relative_key="actions_relative", + base_dim=self.base_action_dim, + ), + hsr_policy.HSRInputs( + action_dim=model_config.action_dim, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + ], + outputs=[ + hsr_policy.HSROutputs( + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + ], + ) + elif self.action_mode == "state_diff_arm_head_relative_gripper_base": + action_sequence_keys = ("action.state_diff", "action.relative") + repack_transform = _transforms.Group( + inputs=[ + _transforms.RepackTransform( + { + "head_rgb": "observation.image.head", + "hand_rgb": "observation.image.hand", + "state": "observation.state", + "actions_state_diff": "action.state_diff", + "actions_relative": "action.relative", + "prompt": "prompt", + } + ) + ] + ) + data_transforms = _transforms.Group( + inputs=[ + _transforms.CombineStateDiffArmHeadRelativeGripperBase( + base_dim=self.base_action_dim + ), + hsr_policy.HSRInputs( + action_dim=model_config.action_dim, + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ), + ], + outputs=[ + hsr_policy.HSROutputs( + adapt_to_pi=self.adapt_to_pi, + convert_gripper=self.convert_gripper, + ) + ], + ) + else: + raise ValueError( + "Invalid action_mode. Expected 'relative', " + "'absolute_arm_head_relative_gripper_base', or " + "'state_diff_arm_head_relative_gripper_base'." + ) + + # Prepare data for policy training + # Convert images to uint8 numpy arrays, add masks + # Model transforms include things like tokenizing the prompt and action targets + model_transforms = ModelTransformFactory(default_prompt=self.default_prompt)(model_config) + return dataclasses.replace( + self.create_base_config(assets_dirs,model_config=model_config), + repack_transforms=repack_transform, + data_transforms=data_transforms, + model_transforms=model_transforms, + action_sequence_keys=action_sequence_keys, + ) @dataclasses.dataclass(frozen=True) class TrainConfig: @@ -484,6 +624,9 @@ class TrainConfig: # Precision for PyTorch training. pytorch_training_precision: Literal["bfloat16", "float32"] = "bfloat16" + + # sample the first batch and send to the wandb + pytorch_sample_data: bool = False lr_schedule: _optimizer.LRScheduleConfig = dataclasses.field(default_factory=_optimizer.CosineDecaySchedule) optimizer: _optimizer.OptimizerConfig = dataclasses.field(default_factory=_optimizer.AdamW) @@ -675,6 +818,25 @@ def __post_init__(self) -> None: # Check the base TrainConfig class for a full list of available hyperparameters. num_train_steps=30_000, ), + TrainConfig( + name="pi0_hsr", + model=pi0_config.Pi0Config(paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m_lora"), + data=LeRobotHSRDataConfig( + repo_id="processed/2025-05-06-07-v3.1-success-only", + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("s3://openpi-assets/checkpoints/pi0_base/params"), + freeze_filter=pi0_config.Pi0Config( + paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m" + ).get_freeze_filter(), + ema_decay=None, + num_workers=8, + batch_size=256, + num_train_steps=200_000, + pytorch_weight_path="/home/user_00103_25b505/shared-storage/dev/models/pi0", + ), TrainConfig( name="pi0_libero_low_mem_finetune", # Here is an example of loading a pi0 model for LoRA fine-tuning. @@ -740,6 +902,7 @@ def __post_init__(self) -> None: # Turn off EMA for LoRA finetuning. ema_decay=None, ), + TrainConfig( name="pi05_libero", model=pi0_config.Pi0Config(pi05=True, action_horizon=10, discrete_state_input=False), @@ -765,7 +928,7 @@ def __post_init__(self) -> None: # Fine-tuning Aloha configs. # # This is a test config that is used to illustate how train on a custom LeRobot dataset. - # For instructions on how to convert and train on your own Aloha dataset see examples/aloha_real/README.md + # For instuctions on how to convert and train on your own Aloha dataset see examples/aloha_real/README.md TrainConfig( name="pi0_aloha_pen_uncap", model=pi0_config.Pi0Config(), @@ -916,6 +1079,128 @@ def __post_init__(self) -> None: num_train_steps=20_000, batch_size=32, ), + TrainConfig( + name="pi05_hsr", + model=pi0_config.Pi0Config( + pi05=True, + action_dim=32, # pi05 is trained with 32-dim actions + action_horizon=16, + ), + data=LeRobotHSRDataConfig( + repo_id="processed/2025-05-06-07-v3.1-success-only", + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"), + num_train_steps=200_000, + batch_size=512, + num_workers=8, # Increase num_workers to speed up data loading with larger datasets. + pytorch_weight_path="/home/user_00103_25b505/shared-storage/dev/models/pi05", + ), + # + # HSR AIRoA — Aligned with eval repo (adapt_to_pi, action_horizon=32, LR decay). + # + TrainConfig( + name="pi05_hsr_airoa", + model=pi0_config.Pi0Config( + pi05=True, + action_dim=32, + action_horizon=32, + ), + data=LeRobotHSRDataConfig( + repo_id="airoa/hsr", + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"), + lr_schedule=_optimizer.CosineDecaySchedule( + warmup_steps=2_000, + peak_lr=5e-5, + decay_steps=50_000, + decay_lr=5e-6, + ), + optimizer=_optimizer.AdamW(clip_gradient_norm=1.0), + ema_decay=0.99, + num_train_steps=50_000, + batch_size=32, + num_workers=16, + log_interval=100, + save_interval=5000, + keep_period=10_000, + wandb_enabled=False, + ), + TrainConfig( + name="pi0_task8", + model=pi0_config.Pi0Config(paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m_lora"), + data=LeRobotHSRDataConfig( + repo_id="lerobot_datasets/task8", + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi0_base/params"), + lr_schedule=_optimizer.CosineDecaySchedule( # batch 512 + warmup_steps=1_000, + peak_lr=1.0e-4, # 2.5e-5 × √16 = 1.0e-4 + decay_steps=80_000, + decay_lr=1.0e-5, # 2.5e-6 × √16 = 1.0e-5 + ), + batch_size=512, + num_workers=16, + num_train_steps=80_000, + # lr_schedule=_optimizer.CosineDecaySchedule( # batch 128 + # warmup_steps=1_000, + # peak_lr=5.0e-5, # 2.5e-5 × 2 = 5.0e-5 + # decay_steps=320_000, # Match num_train_steps. + # decay_lr=5.0e-6, # 2.5e-6 × 2 = 5.0e-6 + # ), + # batch_size=128, + # num_workers=4, + # num_train_steps=320_000, + freeze_filter=pi0_config.Pi0Config( + paligemma_variant="gemma_2b_lora", action_expert_variant="gemma_300m" + ).get_freeze_filter(), + ema_decay=None, + ), + TrainConfig( + name="pi05_task8", + model=pi0_config.Pi0Config( + pi05=True, + action_dim=32, # pi05 is trained with 32-dim actions + action_horizon=16, + ), + data=LeRobotHSRDataConfig( + repo_id="lerobot_datasets/task8", + assets=AssetsConfig( + assets_dir="./assets/pi05_task8", + asset_id="lerobot_datasets/task8", + ), + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"), + lr_schedule=_optimizer.CosineDecaySchedule( # batch 512 + warmup_steps=1_000, + peak_lr=1.0e-4, # 2.5e-5 × √16 = 1.0e-4 + decay_steps=80_000, + decay_lr=1.0e-5, # 2.5e-6 × √16 = 1.0e-5 + ), + batch_size=512, + num_workers=16, + num_train_steps=80_000, + # lr_schedule=_optimizer.CosineDecaySchedule( # batch 128 + # warmup_steps=1_000, + # peak_lr=5.0e-5, # 2.5e-5 × 2 = 5.0e-5 + # decay_steps=320_000, # Match num_train_steps. + # decay_lr=5.0e-6, # 2.5e-6 × 2 = 5.0e-6 + # ), + # batch_size=128, + # num_workers=4, + # num_train_steps=320_000, + ), # # ALOHA Sim configs. This config is used to demonstrate how to train on a simple simulated environment. # @@ -965,9 +1250,41 @@ def __post_init__(self) -> None: exp_name="debug_pi05", wandb_enabled=False, ), - # RoboArena & PolaRiS configs. + # + # RoboArena configs. + # *roboarena_config.get_roboarena_configs(), - *polaris_config.get_polaris_configs(), + # + # Sample checkpoint config + # + TrainConfig( + name="pi05_hsr_task6891011_level12_v2.5_train_adaptive", + model=pi0_config.Pi0Config( + pi05=True, + action_dim=32, # pi05 is trained with 32-dim actions + action_horizon=16, + ), + data=LeRobotHSRDataConfig( + repo_id="lerobot_datasets/task6891011_level12_v2.5_train", + assets=AssetsConfig( + assets_dir="./assets/pi05_hsr_task6891011_level12_v2.5_train_adaptive", + asset_id="lerobot_datasets/task6891011_level12_v2.5_train", + ), + base_config=DataConfig( + prompt_from_task=True, + ), + ), + weight_loader=weight_loaders.CheckpointWeightLoader("gs://openpi-assets/checkpoints/pi05_base/params"), + lr_schedule=_optimizer.CosineDecaySchedule( # batch 64 + warmup_steps=1_000, + peak_lr=3.5e-5, # 2.5e-5 × √2 = 3.5e-5 + decay_steps=1_300_000, # Match num_train_steps. + decay_lr=3.5e-6, # 2.5e-6 × √2 = 3.5e-6 + ), + batch_size=64, + num_workers=8, + num_train_steps=1_300_000, + ), ] if len({config.name for config in _CONFIGS}) != len(_CONFIGS): From be31f6a7109a50d3faede1928718c5f757f86c63 Mon Sep 17 00:00:00 2001 From: Mika Date: Tue, 3 Mar 2026 08:33:49 +0000 Subject: [PATCH 3/4] fix(data): use pyav video backend instead of torchcodec torchcodec requires system FFmpeg libraries (libavutil.so) which are not available on the training cluster. pyav is a pure-Python alternative that works without system dependencies. --- src/openpi/training/data_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/openpi/training/data_loader.py b/src/openpi/training/data_loader.py index e2ee7dd06b..cbf92bc962 100644 --- a/src/openpi/training/data_loader.py +++ b/src/openpi/training/data_loader.py @@ -143,6 +143,7 @@ def create_torch_dataset( delta_timestamps={ key: [t / dataset_meta.fps for t in range(action_horizon)] for key in data_config.action_sequence_keys }, + video_backend="pyav", ) if data_config.prompt_from_task: From b2e41dfbf9f173922b8d0bf6707dd0d017a7bed0 Mon Sep 17 00:00:00 2001 From: Mika Date: Tue, 3 Mar 2026 08:34:04 +0000 Subject: [PATCH 4/4] fix(train): add logging.info for loss/grad_norm visibility in log files tqdm's pbar.write() output is swallowed when stdout is redirected to a file. Adding logging.info() ensures Step N: grad_norm=X, loss=Y, param_norm=Z appears in the training log for monitoring and post-hoc analysis. --- scripts/train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/train.py b/scripts/train.py index 5d289413ab..43c1bd22ca 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -265,6 +265,7 @@ def main(config: _config.TrainConfig): reduced_info = jax.device_get(jax.tree.map(jnp.mean, stacked_infos)) info_str = ", ".join(f"{k}={v:.4f}" for k, v in reduced_info.items()) pbar.write(f"Step {step}: {info_str}") + logging.info(f"Step {step}: {info_str}") wandb.log(reduced_info, step=step) infos = [] batch = next(data_iter)